From af012a75460019ee829209c7a721ba490af2fe29 Mon Sep 17 00:00:00 2001
From: csgulati09 <chandeepsinghgulati@gmail.com>
Date: Fri, 7 Jun 2024 13:52:38 +0530
Subject: [PATCH] feat: vendoring of openai

---
 Makefile                                      |   15 +-
 portkey_ai/__init__.py                        |    2 +
 portkey_ai/_vendor/__init__.py                |    0
 portkey_ai/_vendor/bin/openai                 |    8 +
 .../_vendor/openai-1.26.0.dist-info/INSTALLER |    1 +
 .../_vendor/openai-1.26.0.dist-info/METADATA  |  677 ++++
 .../_vendor/openai-1.26.0.dist-info/RECORD    |  530 +++
 .../_vendor/openai-1.26.0.dist-info/REQUESTED |    0
 .../_vendor/openai-1.26.0.dist-info/WHEEL     |    4 +
 .../openai-1.26.0.dist-info/entry_points.txt  |    2 +
 .../openai-1.26.0.dist-info/licenses/LICENSE  |  201 +
 portkey_ai/_vendor/openai/__init__.py         |  370 ++
 portkey_ai/_vendor/openai/__main__.py         |    3 +
 portkey_ai/_vendor/openai/_base_client.py     | 2107 ++++++++++
 portkey_ai/_vendor/openai/_client.py          |  575 +++
 portkey_ai/_vendor/openai/_compat.py          |  235 ++
 portkey_ai/_vendor/openai/_constants.py       |   16 +
 portkey_ai/_vendor/openai/_exceptions.py      |  161 +
 portkey_ai/_vendor/openai/_extras/__init__.py |    2 +
 portkey_ai/_vendor/openai/_extras/_common.py  |   21 +
 .../_vendor/openai/_extras/numpy_proxy.py     |   37 +
 .../_vendor/openai/_extras/pandas_proxy.py    |   28 +
 portkey_ai/_vendor/openai/_files.py           |  142 +
 portkey_ai/_vendor/openai/_legacy_response.py |  499 +++
 portkey_ai/_vendor/openai/_models.py          |  755 ++++
 portkey_ai/_vendor/openai/_module_client.py   |   85 +
 portkey_ai/_vendor/openai/_qs.py              |  164 +
 portkey_ai/_vendor/openai/_resource.py        |   43 +
 portkey_ai/_vendor/openai/_response.py        |  894 +++++
 portkey_ai/_vendor/openai/_streaming.py       |  438 +++
 portkey_ai/_vendor/openai/_types.py           |  233 ++
 portkey_ai/_vendor/openai/_utils/__init__.py  |   54 +
 portkey_ai/_vendor/openai/_utils/_logs.py     |   25 +
 portkey_ai/_vendor/openai/_utils/_proxy.py    |   63 +
 portkey_ai/_vendor/openai/_utils/_streams.py  |   12 +
 portkey_ai/_vendor/openai/_utils/_sync.py     |   68 +
 .../_vendor/openai/_utils/_transform.py       |  416 ++
 portkey_ai/_vendor/openai/_utils/_typing.py   |  125 +
 portkey_ai/_vendor/openai/_utils/_utils.py    |  414 ++
 portkey_ai/_vendor/openai/_version.py         |    4 +
 portkey_ai/_vendor/openai/cli/__init__.py     |    1 +
 .../_vendor/openai/cli/_api/__init__.py       |    1 +
 portkey_ai/_vendor/openai/cli/_api/_main.py   |   16 +
 portkey_ai/_vendor/openai/cli/_api/audio.py   |   94 +
 .../_vendor/openai/cli/_api/chat/__init__.py  |   13 +
 .../openai/cli/_api/chat/completions.py       |  176 +
 .../_vendor/openai/cli/_api/completions.py    |  185 +
 portkey_ai/_vendor/openai/cli/_api/files.py   |   80 +
 portkey_ai/_vendor/openai/cli/_api/image.py   |  149 +
 portkey_ai/_vendor/openai/cli/_api/models.py  |   45 +
 portkey_ai/_vendor/openai/cli/_cli.py         |  238 ++
 portkey_ai/_vendor/openai/cli/_errors.py      |   25 +
 portkey_ai/_vendor/openai/cli/_models.py      |   19 +
 portkey_ai/_vendor/openai/cli/_progress.py    |   59 +
 .../_vendor/openai/cli/_tools/__init__.py     |    1 +
 portkey_ai/_vendor/openai/cli/_tools/_main.py |   21 +
 .../_vendor/openai/cli/_tools/fine_tunes.py   |   63 +
 .../_vendor/openai/cli/_tools/migrate.py      |  185 +
 portkey_ai/_vendor/openai/cli/_utils.py       |   45 +
 portkey_ai/_vendor/openai/pagination.py       |  107 +
 portkey_ai/_vendor/openai/py.typed            |    0
 .../_vendor/openai/resources/__init__.py      |  159 +
 .../openai/resources/audio/__init__.py        |   61 +
 .../_vendor/openai/resources/audio/audio.py   |  144 +
 .../_vendor/openai/resources/audio/speech.py  |  221 ++
 .../openai/resources/audio/transcriptions.py  |  280 ++
 .../openai/resources/audio/translations.py    |  246 ++
 .../_vendor/openai/resources/batches.py       |  501 +++
 .../_vendor/openai/resources/beta/__init__.py |   61 +
 .../openai/resources/beta/assistants.py       |  937 +++++
 .../_vendor/openai/resources/beta/beta.py     |  146 +
 .../openai/resources/beta/threads/__init__.py |   47 +
 .../openai/resources/beta/threads/messages.py |  707 ++++
 .../resources/beta/threads/runs/__init__.py   |   33 +
 .../resources/beta/threads/runs/runs.py       | 3458 +++++++++++++++++
 .../resources/beta/threads/runs/steps.py      |  340 ++
 .../openai/resources/beta/threads/threads.py  | 2274 +++++++++++
 .../resources/beta/vector_stores/__init__.py  |   47 +
 .../beta/vector_stores/file_batches.py        |  800 ++++
 .../resources/beta/vector_stores/files.py     |  736 ++++
 .../beta/vector_stores/vector_stores.py       |  729 ++++
 .../_vendor/openai/resources/chat/__init__.py |   33 +
 .../_vendor/openai/resources/chat/chat.py     |   80 +
 .../openai/resources/chat/completions.py      | 1272 ++++++
 .../_vendor/openai/resources/completions.py   | 1158 ++++++
 .../_vendor/openai/resources/embeddings.py    |  276 ++
 portkey_ai/_vendor/openai/resources/files.py  |  747 ++++
 .../openai/resources/fine_tuning/__init__.py  |   33 +
 .../resources/fine_tuning/fine_tuning.py      |   81 +
 .../resources/fine_tuning/jobs/__init__.py    |   33 +
 .../resources/fine_tuning/jobs/checkpoints.py |  188 +
 .../openai/resources/fine_tuning/jobs/jobs.py |  725 ++++
 portkey_ai/_vendor/openai/resources/images.py |  635 +++
 portkey_ai/_vendor/openai/resources/models.py |  310 ++
 .../_vendor/openai/resources/moderations.py   |  189 +
 portkey_ai/_vendor/openai/types/__init__.py   |   43 +
 .../_vendor/openai/types/audio/__init__.py    |   13 +
 .../types/audio/speech_create_params.py       |   39 +
 .../openai/types/audio/transcription.py       |   11 +
 .../audio/transcription_create_params.py      |   65 +
 .../_vendor/openai/types/audio/translation.py |   10 +
 .../types/audio/translation_create_params.py  |   48 +
 portkey_ai/_vendor/openai/types/batch.py      |   92 +
 .../openai/types/batch_create_params.py       |   36 +
 .../_vendor/openai/types/batch_error.py       |   21 +
 .../_vendor/openai/types/batch_list_params.py |   24 +
 .../openai/types/batch_request_counts.py      |   17 +
 .../_vendor/openai/types/beta/__init__.py     |   64 +
 .../_vendor/openai/types/beta/assistant.py    |  131 +
 .../types/beta/assistant_create_params.py     |  173 +
 .../openai/types/beta/assistant_deleted.py    |   15 +
 .../types/beta/assistant_list_params.py       |   39 +
 .../types/beta/assistant_response_format.py   |   13 +
 .../beta/assistant_response_format_option.py  |   10 +
 .../assistant_response_format_option_param.py |   14 +
 .../beta/assistant_response_format_param.py   |   12 +
 .../types/beta/assistant_stream_event.py      |  279 ++
 .../openai/types/beta/assistant_tool.py       |   16 +
 .../types/beta/assistant_tool_choice.py       |   16 +
 .../beta/assistant_tool_choice_function.py    |   11 +
 .../assistant_tool_choice_function_param.py   |   12 +
 .../beta/assistant_tool_choice_option.py      |   12 +
 .../assistant_tool_choice_option_param.py     |   14 +
 .../types/beta/assistant_tool_choice_param.py |   16 +
 .../openai/types/beta/assistant_tool_param.py |   15 +
 .../types/beta/assistant_update_params.py     |  124 +
 .../openai/types/beta/chat/__init__.py        |    3 +
 .../types/beta/code_interpreter_tool.py       |   12 +
 .../types/beta/code_interpreter_tool_param.py |   12 +
 .../openai/types/beta/file_search_tool.py     |   12 +
 .../types/beta/file_search_tool_param.py      |   12 +
 .../openai/types/beta/function_tool.py        |   15 +
 .../openai/types/beta/function_tool_param.py  |   16 +
 .../_vendor/openai/types/beta/thread.py       |   65 +
 .../beta/thread_create_and_run_params.py      |  350 ++
 .../openai/types/beta/thread_create_params.py |  130 +
 .../openai/types/beta/thread_deleted.py       |   15 +
 .../openai/types/beta/thread_update_params.py |   56 +
 .../openai/types/beta/threads/__init__.py     |   42 +
 .../openai/types/beta/threads/annotation.py   |   15 +
 .../types/beta/threads/annotation_delta.py    |   15 +
 .../beta/threads/file_citation_annotation.py  |   29 +
 .../threads/file_citation_delta_annotation.py |   33 +
 .../beta/threads/file_path_annotation.py      |   26 +
 .../threads/file_path_delta_annotation.py     |   30 +
 .../openai/types/beta/threads/image_file.py   |   14 +
 .../beta/threads/image_file_content_block.py  |   15 +
 .../types/beta/threads/image_file_delta.py    |   15 +
 .../beta/threads/image_file_delta_block.py    |   19 +
 .../openai/types/beta/threads/message.py      |   91 +
 .../types/beta/threads/message_content.py     |   14 +
 .../beta/threads/message_content_delta.py     |   14 +
 .../beta/threads/message_create_params.py     |   47 +
 .../types/beta/threads/message_deleted.py     |   15 +
 .../types/beta/threads/message_delta.py       |   17 +
 .../types/beta/threads/message_delta_event.py |   19 +
 .../types/beta/threads/message_list_params.py |   42 +
 .../beta/threads/message_update_params.py     |   20 +
 .../required_action_function_tool_call.py     |   34 +
 .../_vendor/openai/types/beta/threads/run.py  |  230 ++
 .../types/beta/threads/run_create_params.py   |  237 ++
 .../types/beta/threads/run_list_params.py     |   39 +
 .../openai/types/beta/threads/run_status.py   |   16 +
 .../threads/run_submit_tool_outputs_params.py |   54 +
 .../types/beta/threads/run_update_params.py   |   20 +
 .../types/beta/threads/runs/__init__.py       |   34 +
 .../threads/runs/code_interpreter_logs.py     |   19 +
 .../runs/code_interpreter_output_image.py     |   26 +
 .../runs/code_interpreter_tool_call.py        |   71 +
 .../runs/code_interpreter_tool_call_delta.py  |   45 +
 .../threads/runs/file_search_tool_call.py     |   21 +
 .../runs/file_search_tool_call_delta.py       |   25 +
 .../beta/threads/runs/function_tool_call.py   |   38 +
 .../threads/runs/function_tool_call_delta.py  |   41 +
 .../runs/message_creation_step_details.py     |   19 +
 .../types/beta/threads/runs/run_step.py       |  113 +
 .../types/beta/threads/runs/run_step_delta.py |   21 +
 .../beta/threads/runs/run_step_delta_event.py |   19 +
 .../runs/run_step_delta_message_delta.py      |   20 +
 .../beta/threads/runs/step_list_params.py     |   41 +
 .../types/beta/threads/runs/tool_call.py      |   16 +
 .../beta/threads/runs/tool_call_delta.py      |   16 +
 .../threads/runs/tool_call_delta_object.py    |   21 +
 .../threads/runs/tool_calls_step_details.py   |   21 +
 .../_vendor/openai/types/beta/threads/text.py |   15 +
 .../types/beta/threads/text_content_block.py  |   15 +
 .../openai/types/beta/threads/text_delta.py   |   15 +
 .../types/beta/threads/text_delta_block.py    |   19 +
 .../_vendor/openai/types/beta/vector_store.py |   79 +
 .../types/beta/vector_store_create_params.py  |   42 +
 .../openai/types/beta/vector_store_deleted.py |   15 +
 .../types/beta/vector_store_list_params.py    |   39 +
 .../types/beta/vector_store_update_params.py  |   35 +
 .../types/beta/vector_stores/__init__.py      |   13 +
 .../vector_stores/file_batch_create_params.py |   17 +
 .../file_batch_list_files_params.py           |   47 +
 .../beta/vector_stores/file_create_params.py  |   16 +
 .../beta/vector_stores/file_list_params.py    |   45 +
 .../beta/vector_stores/vector_store_file.py   |   56 +
 .../vector_stores/vector_store_file_batch.py  |   54 +
 .../vector_store_file_deleted.py              |   15 +
 .../_vendor/openai/types/chat/__init__.py     |   60 +
 .../openai/types/chat/chat_completion.py      |   69 +
 ...chat_completion_assistant_message_param.py |   51 +
 .../types/chat/chat_completion_chunk.py       |  140 +
 ...hat_completion_content_part_image_param.py |   26 +
 .../chat_completion_content_part_param.py     |   16 +
 ...chat_completion_content_part_text_param.py |   15 +
 ...t_completion_function_call_option_param.py |   12 +
 .../chat_completion_function_message_param.py |   19 +
 .../types/chat/chat_completion_message.py     |   40 +
 .../chat/chat_completion_message_param.py     |   21 +
 .../chat/chat_completion_message_tool_call.py |   31 +
 ...chat_completion_message_tool_call_param.py |   31 +
 ...chat_completion_named_tool_choice_param.py |   19 +
 .../openai/types/chat/chat_completion_role.py |    7 +
 .../chat_completion_stream_options_param.py   |   17 +
 .../chat_completion_system_message_param.py   |   22 +
 .../chat/chat_completion_token_logprob.py     |   57 +
 ...hat_completion_tool_choice_option_param.py |   14 +
 .../chat_completion_tool_message_param.py     |   18 +
 .../types/chat/chat_completion_tool_param.py  |   16 +
 .../chat_completion_user_message_param.py     |   25 +
 .../types/chat/completion_create_params.py    |  267 ++
 portkey_ai/_vendor/openai/types/chat_model.py |   27 +
 portkey_ai/_vendor/openai/types/completion.py |   37 +
 .../_vendor/openai/types/completion_choice.py |   35 +
 .../openai/types/completion_create_params.py  |  197 +
 .../_vendor/openai/types/completion_usage.py  |   17 +
 .../openai/types/create_embedding_response.py |   31 +
 portkey_ai/_vendor/openai/types/embedding.py  |   23 +
 .../openai/types/embedding_create_params.py   |   59 +
 .../_vendor/openai/types/file_content.py      |    6 +
 .../openai/types/file_create_params.py        |   25 +
 .../_vendor/openai/types/file_deleted.py      |   15 +
 .../_vendor/openai/types/file_list_params.py  |   12 +
 .../_vendor/openai/types/file_object.py       |   48 +
 .../openai/types/fine_tuning/__init__.py      |   18 +
 .../types/fine_tuning/fine_tuning_job.py      |  124 +
 .../fine_tuning/fine_tuning_job_event.py      |   19 +
 .../fine_tuning_job_integration.py            |    8 +
 .../fine_tuning_job_wandb_integration.py      |   33 +
 ...ine_tuning_job_wandb_integration_object.py |   21 +
 .../types/fine_tuning/job_create_params.py    |  131 +
 .../fine_tuning/job_list_events_params.py     |   15 +
 .../types/fine_tuning/job_list_params.py      |   15 +
 .../openai/types/fine_tuning/jobs/__init__.py |    8 +
 .../jobs/checkpoint_list_params.py            |   15 +
 .../jobs/fine_tuning_job_checkpoint.py        |   47 +
 portkey_ai/_vendor/openai/types/image.py      |   24 +
 .../types/image_create_variation_params.py    |   50 +
 .../_vendor/openai/types/image_edit_params.py |   61 +
 .../openai/types/image_generate_params.py     |   63 +
 .../_vendor/openai/types/images_response.py   |   14 +
 portkey_ai/_vendor/openai/types/model.py      |   21 +
 .../_vendor/openai/types/model_deleted.py     |   14 +
 portkey_ai/_vendor/openai/types/moderation.py |  118 +
 .../openai/types/moderation_create_params.py  |   25 +
 .../types/moderation_create_response.py       |   19 +
 .../_vendor/openai/types/shared/__init__.py   |    5 +
 .../openai/types/shared/error_object.py       |   17 +
 .../types/shared/function_definition.py       |   35 +
 .../types/shared/function_parameters.py       |    7 +
 .../openai/types/shared_params/__init__.py    |    4 +
 .../shared_params/function_definition.py      |   36 +
 .../shared_params/function_parameters.py      |    9 +
 portkey_ai/_vendor/openai/version.py          |    3 +
 portkey_ai/api_resources/apis/audio.py        |    2 +-
 portkey_ai/api_resources/apis/batches.py      |    2 +-
 .../api_resources/apis/chat_complete.py       |    2 +-
 portkey_ai/api_resources/apis/complete.py     |    2 +-
 portkey_ai/api_resources/apis/embeddings.py   |    2 +-
 portkey_ai/api_resources/apis/fine_tuning.py  |    4 +-
 portkey_ai/api_resources/apis/images.py       |    2 +-
 portkey_ai/api_resources/apis/models.py       |    2 +-
 portkey_ai/api_resources/apis/moderations.py  |    2 +-
 portkey_ai/api_resources/apis/threads.py      |   14 +-
 .../api_resources/apis/vector_stores.py       |    4 +-
 portkey_ai/api_resources/client.py            |    3 +-
 .../api_resources/types/batches_type.py       |    4 +-
 .../api_resources/types/fine_tuning_type.py   |    2 +-
 .../api_resources/types/moderations_type.py   |    2 +-
 vendorize.toml                                |    4 +
 283 files changed, 35594 insertions(+), 31 deletions(-)
 create mode 100644 portkey_ai/_vendor/__init__.py
 create mode 100755 portkey_ai/_vendor/bin/openai
 create mode 100644 portkey_ai/_vendor/openai-1.26.0.dist-info/INSTALLER
 create mode 100644 portkey_ai/_vendor/openai-1.26.0.dist-info/METADATA
 create mode 100644 portkey_ai/_vendor/openai-1.26.0.dist-info/RECORD
 create mode 100644 portkey_ai/_vendor/openai-1.26.0.dist-info/REQUESTED
 create mode 100644 portkey_ai/_vendor/openai-1.26.0.dist-info/WHEEL
 create mode 100644 portkey_ai/_vendor/openai-1.26.0.dist-info/entry_points.txt
 create mode 100644 portkey_ai/_vendor/openai-1.26.0.dist-info/licenses/LICENSE
 create mode 100644 portkey_ai/_vendor/openai/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/__main__.py
 create mode 100644 portkey_ai/_vendor/openai/_base_client.py
 create mode 100644 portkey_ai/_vendor/openai/_client.py
 create mode 100644 portkey_ai/_vendor/openai/_compat.py
 create mode 100644 portkey_ai/_vendor/openai/_constants.py
 create mode 100644 portkey_ai/_vendor/openai/_exceptions.py
 create mode 100644 portkey_ai/_vendor/openai/_extras/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/_extras/_common.py
 create mode 100644 portkey_ai/_vendor/openai/_extras/numpy_proxy.py
 create mode 100644 portkey_ai/_vendor/openai/_extras/pandas_proxy.py
 create mode 100644 portkey_ai/_vendor/openai/_files.py
 create mode 100644 portkey_ai/_vendor/openai/_legacy_response.py
 create mode 100644 portkey_ai/_vendor/openai/_models.py
 create mode 100644 portkey_ai/_vendor/openai/_module_client.py
 create mode 100644 portkey_ai/_vendor/openai/_qs.py
 create mode 100644 portkey_ai/_vendor/openai/_resource.py
 create mode 100644 portkey_ai/_vendor/openai/_response.py
 create mode 100644 portkey_ai/_vendor/openai/_streaming.py
 create mode 100644 portkey_ai/_vendor/openai/_types.py
 create mode 100644 portkey_ai/_vendor/openai/_utils/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/_utils/_logs.py
 create mode 100644 portkey_ai/_vendor/openai/_utils/_proxy.py
 create mode 100644 portkey_ai/_vendor/openai/_utils/_streams.py
 create mode 100644 portkey_ai/_vendor/openai/_utils/_sync.py
 create mode 100644 portkey_ai/_vendor/openai/_utils/_transform.py
 create mode 100644 portkey_ai/_vendor/openai/_utils/_typing.py
 create mode 100644 portkey_ai/_vendor/openai/_utils/_utils.py
 create mode 100644 portkey_ai/_vendor/openai/_version.py
 create mode 100644 portkey_ai/_vendor/openai/cli/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_api/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_api/_main.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_api/audio.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_api/chat/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_api/chat/completions.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_api/completions.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_api/files.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_api/image.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_api/models.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_cli.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_errors.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_models.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_progress.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_tools/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_tools/_main.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_tools/fine_tunes.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_tools/migrate.py
 create mode 100644 portkey_ai/_vendor/openai/cli/_utils.py
 create mode 100644 portkey_ai/_vendor/openai/pagination.py
 create mode 100644 portkey_ai/_vendor/openai/py.typed
 create mode 100644 portkey_ai/_vendor/openai/resources/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/resources/audio/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/resources/audio/audio.py
 create mode 100644 portkey_ai/_vendor/openai/resources/audio/speech.py
 create mode 100644 portkey_ai/_vendor/openai/resources/audio/transcriptions.py
 create mode 100644 portkey_ai/_vendor/openai/resources/audio/translations.py
 create mode 100644 portkey_ai/_vendor/openai/resources/batches.py
 create mode 100644 portkey_ai/_vendor/openai/resources/beta/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/resources/beta/assistants.py
 create mode 100644 portkey_ai/_vendor/openai/resources/beta/beta.py
 create mode 100644 portkey_ai/_vendor/openai/resources/beta/threads/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/resources/beta/threads/messages.py
 create mode 100644 portkey_ai/_vendor/openai/resources/beta/threads/runs/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/resources/beta/threads/runs/runs.py
 create mode 100644 portkey_ai/_vendor/openai/resources/beta/threads/runs/steps.py
 create mode 100644 portkey_ai/_vendor/openai/resources/beta/threads/threads.py
 create mode 100644 portkey_ai/_vendor/openai/resources/beta/vector_stores/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/resources/beta/vector_stores/file_batches.py
 create mode 100644 portkey_ai/_vendor/openai/resources/beta/vector_stores/files.py
 create mode 100644 portkey_ai/_vendor/openai/resources/beta/vector_stores/vector_stores.py
 create mode 100644 portkey_ai/_vendor/openai/resources/chat/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/resources/chat/chat.py
 create mode 100644 portkey_ai/_vendor/openai/resources/chat/completions.py
 create mode 100644 portkey_ai/_vendor/openai/resources/completions.py
 create mode 100644 portkey_ai/_vendor/openai/resources/embeddings.py
 create mode 100644 portkey_ai/_vendor/openai/resources/files.py
 create mode 100644 portkey_ai/_vendor/openai/resources/fine_tuning/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/resources/fine_tuning/fine_tuning.py
 create mode 100644 portkey_ai/_vendor/openai/resources/fine_tuning/jobs/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/resources/fine_tuning/jobs/checkpoints.py
 create mode 100644 portkey_ai/_vendor/openai/resources/fine_tuning/jobs/jobs.py
 create mode 100644 portkey_ai/_vendor/openai/resources/images.py
 create mode 100644 portkey_ai/_vendor/openai/resources/models.py
 create mode 100644 portkey_ai/_vendor/openai/resources/moderations.py
 create mode 100644 portkey_ai/_vendor/openai/types/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/types/audio/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/types/audio/speech_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/audio/transcription.py
 create mode 100644 portkey_ai/_vendor/openai/types/audio/transcription_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/audio/translation.py
 create mode 100644 portkey_ai/_vendor/openai/types/audio/translation_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/batch.py
 create mode 100644 portkey_ai/_vendor/openai/types/batch_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/batch_error.py
 create mode 100644 portkey_ai/_vendor/openai/types/batch_list_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/batch_request_counts.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_deleted.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_list_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_response_format.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_response_format_option.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_response_format_option_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_response_format_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_stream_event.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_tool.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_tool_choice.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_function.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_function_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_option.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_option_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_tool_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/assistant_update_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/chat/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/code_interpreter_tool.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/code_interpreter_tool_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/file_search_tool.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/file_search_tool_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/function_tool.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/function_tool_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/thread.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/thread_create_and_run_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/thread_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/thread_deleted.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/thread_update_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/annotation.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/annotation_delta.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/file_citation_annotation.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/file_citation_delta_annotation.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/file_path_annotation.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/file_path_delta_annotation.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/image_file.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/image_file_content_block.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/image_file_delta.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/image_file_delta_block.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/message.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/message_content.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/message_content_delta.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/message_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/message_deleted.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/message_delta.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/message_delta_event.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/message_list_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/message_update_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/required_action_function_tool_call.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/run.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/run_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/run_list_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/run_status.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/run_submit_tool_outputs_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/run_update_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_logs.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_output_image.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_tool_call.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/file_search_tool_call.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/file_search_tool_call_delta.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/function_tool_call.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/function_tool_call_delta.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/message_creation_step_details.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/run_step.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/run_step_delta.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/run_step_delta_event.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/run_step_delta_message_delta.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/step_list_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/tool_call.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/tool_call_delta.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/tool_call_delta_object.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/runs/tool_calls_step_details.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/text.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/text_content_block.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/text_delta.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/threads/text_delta_block.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/vector_store.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/vector_store_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/vector_store_deleted.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/vector_store_list_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/vector_store_update_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/vector_stores/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/vector_stores/file_batch_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/vector_stores/file_batch_list_files_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/vector_stores/file_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/vector_stores/file_list_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/vector_stores/vector_store_file.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/vector_stores/vector_store_file_batch.py
 create mode 100644 portkey_ai/_vendor/openai/types/beta/vector_stores/vector_store_file_deleted.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_assistant_message_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_chunk.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_content_part_image_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_content_part_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_content_part_text_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_function_call_option_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_function_message_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_message.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_message_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_message_tool_call.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_message_tool_call_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_named_tool_choice_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_role.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_stream_options_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_system_message_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_token_logprob.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_tool_choice_option_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_tool_message_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_tool_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/chat_completion_user_message_param.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat/completion_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/chat_model.py
 create mode 100644 portkey_ai/_vendor/openai/types/completion.py
 create mode 100644 portkey_ai/_vendor/openai/types/completion_choice.py
 create mode 100644 portkey_ai/_vendor/openai/types/completion_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/completion_usage.py
 create mode 100644 portkey_ai/_vendor/openai/types/create_embedding_response.py
 create mode 100644 portkey_ai/_vendor/openai/types/embedding.py
 create mode 100644 portkey_ai/_vendor/openai/types/embedding_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/file_content.py
 create mode 100644 portkey_ai/_vendor/openai/types/file_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/file_deleted.py
 create mode 100644 portkey_ai/_vendor/openai/types/file_list_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/file_object.py
 create mode 100644 portkey_ai/_vendor/openai/types/fine_tuning/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job.py
 create mode 100644 portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_event.py
 create mode 100644 portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_integration.py
 create mode 100644 portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
 create mode 100644 portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py
 create mode 100644 portkey_ai/_vendor/openai/types/fine_tuning/job_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/fine_tuning/job_list_events_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/fine_tuning/job_list_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/fine_tuning/jobs/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/types/fine_tuning/jobs/checkpoint_list_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
 create mode 100644 portkey_ai/_vendor/openai/types/image.py
 create mode 100644 portkey_ai/_vendor/openai/types/image_create_variation_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/image_edit_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/image_generate_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/images_response.py
 create mode 100644 portkey_ai/_vendor/openai/types/model.py
 create mode 100644 portkey_ai/_vendor/openai/types/model_deleted.py
 create mode 100644 portkey_ai/_vendor/openai/types/moderation.py
 create mode 100644 portkey_ai/_vendor/openai/types/moderation_create_params.py
 create mode 100644 portkey_ai/_vendor/openai/types/moderation_create_response.py
 create mode 100644 portkey_ai/_vendor/openai/types/shared/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/types/shared/error_object.py
 create mode 100644 portkey_ai/_vendor/openai/types/shared/function_definition.py
 create mode 100644 portkey_ai/_vendor/openai/types/shared/function_parameters.py
 create mode 100644 portkey_ai/_vendor/openai/types/shared_params/__init__.py
 create mode 100644 portkey_ai/_vendor/openai/types/shared_params/function_definition.py
 create mode 100644 portkey_ai/_vendor/openai/types/shared_params/function_parameters.py
 create mode 100644 portkey_ai/_vendor/openai/version.py
 create mode 100644 vendorize.toml

diff --git a/Makefile b/Makefile
index 2f482ada..95cd990b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,24 +1,25 @@
 GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+EXCLUDE_DIRS := portkey_ai/_vendor,venv
 help: ## Show all Makefile targets
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
 
 .PHONY: format lint
 format: ## Run code formatter: black
 	black .
-	ruff check . --fix         
+	ruff check . --fix --exclude $(EXCLUDE_DIRS)   
 lint: ## Run linters: mypy, black, ruff
-	mypy .
-	black . --check
-	ruff check .
+	mypy . --exclude portkey_ai/_vendor --exclude venv
+	black . --check --exclude portkey_ai/_vendor --exclude venv
+	ruff check . --exclude $(EXCLUDE_DIRS)
 test: ## Run tests
 	pytest tests
 watch-docs: ## Build and watch documentation
 	sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
 
 build:
-	mypy .
-	black . --check
-	ruff check .
+	mypy . --exclude portkey_ai/_vendor --exclude venv
+	black . --check --exclude portkey_ai/_vendor --exclude venv
+	ruff check . --exclude $(EXCLUDE_DIRS)
 	rm -rf dist/ build/
 	python -m pip install build
 	python -m build .
diff --git a/portkey_ai/__init__.py b/portkey_ai/__init__.py
index c876d789..2f325abf 100644
--- a/portkey_ai/__init__.py
+++ b/portkey_ai/__init__.py
@@ -1,5 +1,6 @@
 import os
 from typing import Mapping, Optional, Union
+from ._vendor import openai
 from portkey_ai.api_resources import (
     LLMOptions,
     Modes,
@@ -143,4 +144,5 @@
     "AsyncVectorStores",
     "AsyncVectorFiles",
     "AsyncVectorFileBatches",
+    "openai",
 ]
diff --git a/portkey_ai/_vendor/__init__.py b/portkey_ai/_vendor/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/portkey_ai/_vendor/bin/openai b/portkey_ai/_vendor/bin/openai
new file mode 100755
index 00000000..c10f3e4a
--- /dev/null
+++ b/portkey_ai/_vendor/bin/openai
@@ -0,0 +1,8 @@
+#!/Users/chandeep/Documents/Workspace/Portkey/SDK/portkey-python-sdk/venv/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from openai.cli import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())
diff --git a/portkey_ai/_vendor/openai-1.26.0.dist-info/INSTALLER b/portkey_ai/_vendor/openai-1.26.0.dist-info/INSTALLER
new file mode 100644
index 00000000..a1b589e3
--- /dev/null
+++ b/portkey_ai/_vendor/openai-1.26.0.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/portkey_ai/_vendor/openai-1.26.0.dist-info/METADATA b/portkey_ai/_vendor/openai-1.26.0.dist-info/METADATA
new file mode 100644
index 00000000..2254fbe2
--- /dev/null
+++ b/portkey_ai/_vendor/openai-1.26.0.dist-info/METADATA
@@ -0,0 +1,677 @@
+Metadata-Version: 2.3
+Name: openai
+Version: 1.26.0
+Summary: The official Python library for the openai API
+Project-URL: Homepage, https://github.com/openai/openai-python
+Project-URL: Repository, https://github.com/openai/openai-python
+Author-email: OpenAI <support@openai.com>
+License-Expression: Apache-2.0
+License-File: LICENSE
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: MacOS
+Classifier: Operating System :: Microsoft :: Windows
+Classifier: Operating System :: OS Independent
+Classifier: Operating System :: POSIX
+Classifier: Operating System :: POSIX :: Linux
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Typing :: Typed
+Requires-Python: >=3.7.1
+Requires-Dist: anyio<5,>=3.5.0
+Requires-Dist: cached-property; python_version < '3.8'
+Requires-Dist: distro<2,>=1.7.0
+Requires-Dist: httpx<1,>=0.23.0
+Requires-Dist: pydantic<3,>=1.9.0
+Requires-Dist: sniffio
+Requires-Dist: tqdm>4
+Requires-Dist: typing-extensions<5,>=4.7
+Provides-Extra: datalib
+Requires-Dist: numpy>=1; extra == 'datalib'
+Requires-Dist: pandas-stubs>=1.1.0.11; extra == 'datalib'
+Requires-Dist: pandas>=1.2.3; extra == 'datalib'
+Description-Content-Type: text/markdown
+
+# OpenAI Python API library
+
+[![PyPI version](https://img.shields.io/pypi/v/openai.svg)](https://pypi.org/project/openai/)
+
+The OpenAI Python library provides convenient access to the OpenAI REST API from any Python 3.7+
+application. The library includes type definitions for all request params and response fields,
+and offers both synchronous and asynchronous clients powered by [httpx](https://github.com/encode/httpx).
+
+It is generated from our [OpenAPI specification](https://github.com/openai/openai-openapi) with [Stainless](https://stainlessapi.com/).
+
+## Documentation
+
+The REST API documentation can be found [on platform.openai.com](https://platform.openai.com/docs). The full API of this library can be found in [api.md](https://github.com/openai/openai-python/tree/main/api.md).
+
+## Installation
+
+> [!IMPORTANT]
+> The SDK was rewritten in v1, which was released November 6th 2023. See the [v1 migration guide](https://github.com/openai/openai-python/discussions/742), which includes scripts to automatically update your code.
+
+```sh
+# install from PyPI
+pip install openai
+```
+
+## Usage
+
+The full API of this library can be found in [api.md](https://github.com/openai/openai-python/tree/main/api.md).
+
+```python
+import os
+from openai import OpenAI
+
+client = OpenAI(
+    # This is the default and can be omitted
+    api_key=os.environ.get("OPENAI_API_KEY"),
+)
+
+chat_completion = client.chat.completions.create(
+    messages=[
+        {
+            "role": "user",
+            "content": "Say this is a test",
+        }
+    ],
+    model="gpt-3.5-turbo",
+)
+```
+
+While you can provide an `api_key` keyword argument,
+we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/)
+to add `OPENAI_API_KEY="My API Key"` to your `.env` file
+so that your API Key is not stored in source control.
+
+### Polling Helpers
+
+When interacting with the API some actions such as starting a Run and adding files to vector stores are asynchronous and take time to complete. The SDK includes
+helper functions which will poll the status until it reaches a terminal state and then return the resulting object.
+If an API method results in an action which could benefit from polling there will be a corresponding version of the
+method ending in '\_and_poll'.
+
+For instance to create a Run and poll until it reaches a terminal state you can run:
+
+```python
+run = client.beta.threads.runs.create_and_poll(
+    thread_id=thread.id,
+    assistant_id=assistant.id,
+)
+```
+
+More information on the lifecycle of a Run can be found in the [Run Lifecycle Documentation](https://platform.openai.com/docs/assistants/how-it-works/run-lifecycle)
+
+### Bulk Upload Helpers
+
+When creating an interacting with vector stores, you can use the polling helpers to monitor the status of operations.
+For convenience, we also provide a bulk upload helper to allow you to simultaneously upload several files at once.
+
+```python
+sample_files = [Path("sample-paper.pdf"), ...]
+
+batch = await client.vector_stores.file_batches.upload_and_poll(
+    store.id,
+    files=sample_files,
+)
+```
+
+### Streaming Helpers
+
+The SDK also includes helpers to process streams and handle the incoming events.
+
+```python
+with client.beta.threads.runs.stream(
+    thread_id=thread.id,
+    assistant_id=assistant.id,
+    instructions="Please address the user as Jane Doe. The user has a premium account.",
+) as stream:
+    for event in stream:
+        # Print the text from text delta events
+        if event.type == "thread.message.delta" and event.data.delta.content:
+            print(event.data.delta.content[0].text)
+```
+
+More information on streaming helpers can be found in the dedicated documentation: [helpers.md](https://github.com/openai/openai-python/tree/main/helpers.md)
+
+## Async usage
+
+Simply import `AsyncOpenAI` instead of `OpenAI` and use `await` with each API call:
+
+```python
+import os
+import asyncio
+from openai import AsyncOpenAI
+
+client = AsyncOpenAI(
+    # This is the default and can be omitted
+    api_key=os.environ.get("OPENAI_API_KEY"),
+)
+
+
+async def main() -> None:
+    chat_completion = await client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": "Say this is a test",
+            }
+        ],
+        model="gpt-3.5-turbo",
+    )
+
+
+asyncio.run(main())
+```
+
+Functionality between the synchronous and asynchronous clients is otherwise identical.
+
+## Streaming responses
+
+We provide support for streaming responses using Server Side Events (SSE).
+
+```python
+from openai import OpenAI
+
+client = OpenAI()
+
+stream = client.chat.completions.create(
+    model="gpt-4",
+    messages=[{"role": "user", "content": "Say this is a test"}],
+    stream=True,
+)
+for chunk in stream:
+    print(chunk.choices[0].delta.content or "", end="")
+```
+
+The async client uses the exact same interface.
+
+```python
+from openai import AsyncOpenAI
+
+client = AsyncOpenAI()
+
+
+async def main():
+    stream = await client.chat.completions.create(
+        model="gpt-4",
+        messages=[{"role": "user", "content": "Say this is a test"}],
+        stream=True,
+    )
+    async for chunk in stream:
+        print(chunk.choices[0].delta.content or "", end="")
+
+
+asyncio.run(main())
+```
+
+## Module-level client
+
+> [!IMPORTANT]
+> We highly recommend instantiating client instances instead of relying on the global client.
+
+We also expose a global client instance that is accessible in a similar fashion to versions prior to v1.
+
+```py
+import openai
+
+# optional; defaults to `os.environ['OPENAI_API_KEY']`
+openai.api_key = '...'
+
+# all client options can be configured just like the `OpenAI` instantiation counterpart
+openai.base_url = "https://..."
+openai.default_headers = {"x-foo": "true"}
+
+completion = openai.chat.completions.create(
+    model="gpt-4",
+    messages=[
+        {
+            "role": "user",
+            "content": "How do I output all files in a directory using Python?",
+        },
+    ],
+)
+print(completion.choices[0].message.content)
+```
+
+The API is the exact same as the standard client instance based API.
+
+This is intended to be used within REPLs or notebooks for faster iteration, **not** in application code.
+
+We recommend that you always instantiate a client (e.g., with `client = OpenAI()`) in application code because:
+
+- It can be difficult to reason about where client options are configured
+- It's not possible to change certain client options without potentially causing race conditions
+- It's harder to mock for testing purposes
+- It's not possible to control cleanup of network connections
+
+## Using types
+
+Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typing.html#typing.TypedDict). Responses are [Pydantic models](https://docs.pydantic.dev) which also provide helper methods for things like:
+
+- Serializing back into JSON, `model.to_json()`
+- Converting to a dictionary, `model.to_dict()`
+
+Typed requests and responses provide autocomplete and documentation within your editor. If you would like to see type errors in VS Code to help catch bugs earlier, set `python.analysis.typeCheckingMode` to `basic`.
+
+## Pagination
+
+List methods in the OpenAI API are paginated.
+
+This library provides auto-paginating iterators with each list response, so you do not have to request successive pages manually:
+
+```python
+import openai
+
+client = OpenAI()
+
+all_jobs = []
+# Automatically fetches more pages as needed.
+for job in client.fine_tuning.jobs.list(
+    limit=20,
+):
+    # Do something with job here
+    all_jobs.append(job)
+print(all_jobs)
+```
+
+Or, asynchronously:
+
+```python
+import asyncio
+import openai
+
+client = AsyncOpenAI()
+
+
+async def main() -> None:
+    all_jobs = []
+    # Iterate through items across all pages, issuing requests as needed.
+    async for job in client.fine_tuning.jobs.list(
+        limit=20,
+    ):
+        all_jobs.append(job)
+    print(all_jobs)
+
+
+asyncio.run(main())
+```
+
+Alternatively, you can use the `.has_next_page()`, `.next_page_info()`, or `.get_next_page()` methods for more granular control working with pages:
+
+```python
+first_page = await client.fine_tuning.jobs.list(
+    limit=20,
+)
+if first_page.has_next_page():
+    print(f"will fetch next page using these details: {first_page.next_page_info()}")
+    next_page = await first_page.get_next_page()
+    print(f"number of items we just fetched: {len(next_page.data)}")
+
+# Remove `await` for non-async usage.
+```
+
+Or just work directly with the returned data:
+
+```python
+first_page = await client.fine_tuning.jobs.list(
+    limit=20,
+)
+
+print(f"next page cursor: {first_page.after}")  # => "next page cursor: ..."
+for job in first_page.data:
+    print(job.id)
+
+# Remove `await` for non-async usage.
+```
+
+## Nested params
+
+Nested parameters are dictionaries, typed using `TypedDict`, for example:
+
+```python
+from openai import OpenAI
+
+client = OpenAI()
+
+completion = client.chat.completions.create(
+    messages=[
+        {
+            "role": "user",
+            "content": "Can you generate an example json object describing a fruit?",
+        }
+    ],
+    model="gpt-3.5-turbo-1106",
+    response_format={"type": "json_object"},
+)
+```
+
+## File uploads
+
+Request parameters that correspond to file uploads can be passed as `bytes`, a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance or a tuple of `(filename, contents, media type)`.
+
+```python
+from pathlib import Path
+from openai import OpenAI
+
+client = OpenAI()
+
+client.files.create(
+    file=Path("input.jsonl"),
+    purpose="fine-tune",
+)
+```
+
+The async client uses the exact same interface. If you pass a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance, the file contents will be read asynchronously automatically.
+
+## Handling errors
+
+When the library is unable to connect to the API (for example, due to network connection problems or a timeout), a subclass of `openai.APIConnectionError` is raised.
+
+When the API returns a non-success status code (that is, 4xx or 5xx
+response), a subclass of `openai.APIStatusError` is raised, containing `status_code` and `response` properties.
+
+All errors inherit from `openai.APIError`.
+
+```python
+import openai
+from openai import OpenAI
+
+client = OpenAI()
+
+try:
+    client.fine_tuning.jobs.create(
+        model="gpt-3.5-turbo",
+        training_file="file-abc123",
+    )
+except openai.APIConnectionError as e:
+    print("The server could not be reached")
+    print(e.__cause__)  # an underlying Exception, likely raised within httpx.
+except openai.RateLimitError as e:
+    print("A 429 status code was received; we should back off a bit.")
+except openai.APIStatusError as e:
+    print("Another non-200-range status code was received")
+    print(e.status_code)
+    print(e.response)
+```
+
+Error codes are as followed:
+
+| Status Code | Error Type                 |
+| ----------- | -------------------------- |
+| 400         | `BadRequestError`          |
+| 401         | `AuthenticationError`      |
+| 403         | `PermissionDeniedError`    |
+| 404         | `NotFoundError`            |
+| 422         | `UnprocessableEntityError` |
+| 429         | `RateLimitError`           |
+| >=500       | `InternalServerError`      |
+| N/A         | `APIConnectionError`       |
+
+### Retries
+
+Certain errors are automatically retried 2 times by default, with a short exponential backoff.
+Connection errors (for example, due to a network connectivity problem), 408 Request Timeout, 409 Conflict,
+429 Rate Limit, and >=500 Internal errors are all retried by default.
+
+You can use the `max_retries` option to configure or disable retry settings:
+
+```python
+from openai import OpenAI
+
+# Configure the default for all requests:
+client = OpenAI(
+    # default is 2
+    max_retries=0,
+)
+
+# Or, configure per-request:
+client.with_options(max_retries=5).chat.completions.create(
+    messages=[
+        {
+            "role": "user",
+            "content": "How can I get the name of the current day in Node.js?",
+        }
+    ],
+    model="gpt-3.5-turbo",
+)
+```
+
+### Timeouts
+
+By default requests time out after 10 minutes. You can configure this with a `timeout` option,
+which accepts a float or an [`httpx.Timeout`](https://www.python-httpx.org/advanced/#fine-tuning-the-configuration) object:
+
+```python
+from openai import OpenAI
+
+# Configure the default for all requests:
+client = OpenAI(
+    # 20 seconds (default is 10 minutes)
+    timeout=20.0,
+)
+
+# More granular control:
+client = OpenAI(
+    timeout=httpx.Timeout(60.0, read=5.0, write=10.0, connect=2.0),
+)
+
+# Override per-request:
+client.with_options(timeout=5.0).chat.completions.create(
+    messages=[
+        {
+            "role": "user",
+            "content": "How can I list all files in a directory using Python?",
+        }
+    ],
+    model="gpt-3.5-turbo",
+)
+```
+
+On timeout, an `APITimeoutError` is thrown.
+
+Note that requests that time out are [retried twice by default](https://github.com/openai/openai-python/tree/main/#retries).
+
+## Advanced
+
+### Logging
+
+We use the standard library [`logging`](https://docs.python.org/3/library/logging.html) module.
+
+You can enable logging by setting the environment variable `OPENAI_LOG` to `debug`.
+
+```shell
+$ export OPENAI_LOG=debug
+```
+
+### How to tell whether `None` means `null` or missing
+
+In an API response, a field may be explicitly `null`, or missing entirely; in either case, its value is `None` in this library. You can differentiate the two cases with `.model_fields_set`:
+
+```py
+if response.my_field is None:
+  if 'my_field' not in response.model_fields_set:
+    print('Got json like {}, without a "my_field" key present at all.')
+  else:
+    print('Got json like {"my_field": null}.')
+```
+
+### Accessing raw response data (e.g. headers)
+
+The "raw" Response object can be accessed by prefixing `.with_raw_response.` to any HTTP method call, e.g.,
+
+```py
+from openai import OpenAI
+
+client = OpenAI()
+response = client.chat.completions.with_raw_response.create(
+    messages=[{
+        "role": "user",
+        "content": "Say this is a test",
+    }],
+    model="gpt-3.5-turbo",
+)
+print(response.headers.get('X-My-Header'))
+
+completion = response.parse()  # get the object that `chat.completions.create()` would have returned
+print(completion)
+```
+
+These methods return an [`LegacyAPIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_legacy_response.py) object. This is a legacy class as we're changing it slightly in the next major version.
+
+For the sync client this will mostly be the same with the exception
+of `content` & `text` will be methods instead of properties. In the
+async client, all methods will be async.
+
+A migration script will be provided & the migration in general should
+be smooth.
+
+#### `.with_streaming_response`
+
+The above interface eagerly reads the full response body when you make the request, which may not always be what you want.
+
+To stream the response body, use `.with_streaming_response` instead, which requires a context manager and only reads the response body once you call `.read()`, `.text()`, `.json()`, `.iter_bytes()`, `.iter_text()`, `.iter_lines()` or `.parse()`. In the async client, these are async methods.
+
+As such, `.with_streaming_response` methods return a different [`APIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_response.py) object, and the async client returns an [`AsyncAPIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_response.py) object.
+
+```python
+with client.chat.completions.with_streaming_response.create(
+    messages=[
+        {
+            "role": "user",
+            "content": "Say this is a test",
+        }
+    ],
+    model="gpt-3.5-turbo",
+) as response:
+    print(response.headers.get("X-My-Header"))
+
+    for line in response.iter_lines():
+        print(line)
+```
+
+The context manager is required so that the response will reliably be closed.
+
+### Making custom/undocumented requests
+
+This library is typed for convenient access to the documented API.
+
+If you need to access undocumented endpoints, params, or response properties, the library can still be used.
+
+#### Undocumented endpoints
+
+To make requests to undocumented endpoints, you can make requests using `client.get`, `client.post`, and other
+http verbs. Options on the client will be respected (such as retries) will be respected when making this
+request.
+
+```py
+import httpx
+
+response = client.post(
+    "/foo",
+    cast_to=httpx.Response,
+    body={"my_param": True},
+)
+
+print(response.headers.get("x-foo"))
+```
+
+#### Undocumented request params
+
+If you want to explicitly send an extra param, you can do so with the `extra_query`, `extra_body`, and `extra_headers` request
+options.
+
+#### Undocumented response properties
+
+To access undocumented response properties, you can access the extra fields like `response.unknown_prop`. You
+can also get all the extra fields on the Pydantic model as a dict with
+[`response.model_extra`](https://docs.pydantic.dev/latest/api/base_model/#pydantic.BaseModel.model_extra).
+
+### Configuring the HTTP client
+
+You can directly override the [httpx client](https://www.python-httpx.org/api/#client) to customize it for your use case, including:
+
+- Support for proxies
+- Custom transports
+- Additional [advanced](https://www.python-httpx.org/advanced/#client-instances) functionality
+
+```python
+from openai import OpenAI, DefaultHttpxClient
+
+client = OpenAI(
+    # Or use the `OPENAI_BASE_URL` env var
+    base_url="http://my.test.server.example.com:8083",
+    http_client=DefaultHttpxClient(
+        proxies="http://my.test.proxy.example.com",
+        transport=httpx.HTTPTransport(local_address="0.0.0.0"),
+    ),
+)
+```
+
+### Managing HTTP resources
+
+By default the library closes underlying HTTP connections whenever the client is [garbage collected](https://docs.python.org/3/reference/datamodel.html#object.__del__). You can manually close the client using the `.close()` method if desired, or with a context manager that closes when exiting.
+
+## Microsoft Azure OpenAI
+
+To use this library with [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview), use the `AzureOpenAI`
+class instead of the `OpenAI` class.
+
+> [!IMPORTANT]
+> The Azure API shape differs from the core API shape which means that the static types for responses / params
+> won't always be correct.
+
+```py
+from openai import AzureOpenAI
+
+# gets the API Key from environment variable AZURE_OPENAI_API_KEY
+client = AzureOpenAI(
+    # https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning
+    api_version="2023-07-01-preview",
+    # https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
+    azure_endpoint="https://example-endpoint.openai.azure.com",
+)
+
+completion = client.chat.completions.create(
+    model="deployment-name",  # e.g. gpt-35-instant
+    messages=[
+        {
+            "role": "user",
+            "content": "How do I output all files in a directory using Python?",
+        },
+    ],
+)
+print(completion.to_json())
+```
+
+In addition to the options provided in the base `OpenAI` client, the following options are provided:
+
+- `azure_endpoint` (or the `AZURE_OPENAI_ENDPOINT` environment variable)
+- `azure_deployment`
+- `api_version` (or the `OPENAI_API_VERSION` environment variable)
+- `azure_ad_token` (or the `AZURE_OPENAI_AD_TOKEN` environment variable)
+- `azure_ad_token_provider`
+
+An example of using the client with Azure Active Directory can be found [here](https://github.com/openai/openai-python/blob/main/examples/azure_ad.py).
+
+## Versioning
+
+This package generally follows [SemVer](https://semver.org/spec/v2.0.0.html) conventions, though certain backwards-incompatible changes may be released as minor versions:
+
+1. Changes that only affect static types, without breaking runtime behavior.
+2. Changes to library internals which are technically public but not intended or documented for external use. _(Please open a GitHub issue to let us know if you are relying on such internals)_.
+3. Changes that we do not expect to impact the vast majority of users in practice.
+
+We take backwards-compatibility seriously and work hard to ensure you can rely on a smooth upgrade experience.
+
+We are keen for your feedback; please open an [issue](https://www.github.com/openai/openai-python/issues) with questions, bugs, or suggestions.
+
+## Requirements
+
+Python 3.7 or higher.
diff --git a/portkey_ai/_vendor/openai-1.26.0.dist-info/RECORD b/portkey_ai/_vendor/openai-1.26.0.dist-info/RECORD
new file mode 100644
index 00000000..86bbc7aa
--- /dev/null
+++ b/portkey_ai/_vendor/openai-1.26.0.dist-info/RECORD
@@ -0,0 +1,530 @@
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/__main__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_base_client.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_client.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_compat.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_constants.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_exceptions.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_extras/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_extras/_common.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_extras/numpy_proxy.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_extras/pandas_proxy.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_files.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_legacy_response.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_models.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_module_client.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_qs.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_resource.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_response.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_streaming.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_types.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_utils/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_utils/_logs.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_utils/_proxy.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_utils/_streams.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_utils/_sync.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_utils/_transform.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_utils/_typing.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_utils/_utils.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/_version.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_api/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_api/_main.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_api/audio.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_api/chat/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_api/chat/completions.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_api/completions.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_api/files.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_api/image.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_api/models.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_cli.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_errors.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_models.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_progress.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_tools/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_tools/_main.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_tools/fine_tunes.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_tools/migrate.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/cli/_utils.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/lib/_old_api.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/lib/_validators.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/lib/azure.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/lib/streaming/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/lib/streaming/_assistants.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/pagination.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/audio/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/audio/audio.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/audio/speech.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/audio/transcriptions.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/audio/translations.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/batches.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/beta/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/beta/assistants.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/beta/beta.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/beta/threads/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/beta/threads/messages.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/beta/threads/runs/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/beta/threads/runs/runs.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/beta/threads/runs/steps.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/beta/threads/threads.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/beta/vector_stores/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/beta/vector_stores/file_batches.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/beta/vector_stores/files.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/beta/vector_stores/vector_stores.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/chat/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/chat/chat.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/chat/completions.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/completions.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/embeddings.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/files.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/fine_tuning/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/fine_tuning/fine_tuning.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/fine_tuning/jobs/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/fine_tuning/jobs/checkpoints.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/fine_tuning/jobs/jobs.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/images.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/models.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/resources/moderations.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/audio/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/audio/speech_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/audio/transcription.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/audio/transcription_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/audio/translation.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/audio/translation_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/batch.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/batch_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/batch_error.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/batch_list_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/batch_request_counts.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_deleted.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_list_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_response_format.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_response_format_option.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_response_format_option_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_response_format_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_stream_event.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_tool.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_tool_choice.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_tool_choice_function.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_tool_choice_function_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_tool_choice_option.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_tool_choice_option_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_tool_choice_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_tool_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/assistant_update_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/chat/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/code_interpreter_tool.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/code_interpreter_tool_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/file_search_tool.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/file_search_tool_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/function_tool.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/function_tool_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/thread.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/thread_create_and_run_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/thread_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/thread_deleted.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/thread_update_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/annotation.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/annotation_delta.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/file_citation_annotation.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/file_citation_delta_annotation.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/file_path_annotation.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/file_path_delta_annotation.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/image_file.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/image_file_content_block.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/image_file_delta.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/image_file_delta_block.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/message.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/message_content.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/message_content_delta.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/message_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/message_deleted.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/message_delta.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/message_delta_event.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/message_list_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/message_update_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/required_action_function_tool_call.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/run.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/run_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/run_list_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/run_status.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/run_submit_tool_outputs_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/run_update_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/code_interpreter_logs.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/code_interpreter_output_image.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/code_interpreter_tool_call.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/file_search_tool_call.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/file_search_tool_call_delta.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/function_tool_call.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/function_tool_call_delta.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/message_creation_step_details.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/run_step.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/run_step_delta.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/run_step_delta_event.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/run_step_delta_message_delta.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/step_list_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/tool_call.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/tool_call_delta.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/tool_call_delta_object.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/runs/tool_calls_step_details.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/text.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/text_content_block.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/text_delta.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/threads/text_delta_block.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/vector_store.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/vector_store_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/vector_store_deleted.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/vector_store_list_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/vector_store_update_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/vector_stores/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/vector_stores/file_batch_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/vector_stores/file_batch_list_files_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/vector_stores/file_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/vector_stores/file_list_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/vector_stores/vector_store_file.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/vector_stores/vector_store_file_batch.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/beta/vector_stores/vector_store_file_deleted.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_assistant_message_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_chunk.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_content_part_image_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_content_part_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_content_part_text_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_function_call_option_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_function_message_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_message.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_message_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_message_tool_call.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_message_tool_call_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_named_tool_choice_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_role.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_stream_options_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_system_message_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_token_logprob.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_tool_choice_option_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_tool_message_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_tool_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/chat_completion_user_message_param.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat/completion_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/chat_model.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/completion.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/completion_choice.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/completion_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/completion_usage.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/create_embedding_response.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/embedding.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/embedding_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/file_content.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/file_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/file_deleted.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/file_list_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/file_object.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/fine_tuning/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/fine_tuning/fine_tuning_job.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/fine_tuning/fine_tuning_job_event.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/fine_tuning/fine_tuning_job_integration.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/fine_tuning/fine_tuning_job_wandb_integration.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/fine_tuning/job_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/fine_tuning/job_list_events_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/fine_tuning/job_list_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/fine_tuning/jobs/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/fine_tuning/jobs/checkpoint_list_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/image.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/image_create_variation_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/image_edit_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/image_generate_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/images_response.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/model.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/model_deleted.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/moderation.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/moderation_create_params.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/moderation_create_response.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/shared/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/shared/error_object.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/shared/function_definition.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/shared/function_parameters.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/shared_params/__init__.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/shared_params/function_definition.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/types/shared_params/function_parameters.cpython-39.pyc,,
+../../../../../../../../../Users/chandeep/Library/Caches/com.apple.python/private/var/folders/9_/nx5thgw547s1zlwxzhbjtb1m0000gn/T/pip-target-qt7h94x6/lib/python/openai/version.cpython-39.pyc,,
+../../bin/openai,sha256=hl2E5BbKWVfkczcW65l8G1zyeJ3Si5m9TUnp5aG8gtY,276
+openai-1.26.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+openai-1.26.0.dist-info/METADATA,sha256=aqvy9hK9MmteG_6rBZeNpdB9KZfDV93HyipQqq44q88,21941
+openai-1.26.0.dist-info/RECORD,,
+openai-1.26.0.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+openai-1.26.0.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
+openai-1.26.0.dist-info/entry_points.txt,sha256=kAYhQEmziJwsKs5raYAIOvJ2LWmbz5dulEXOzsY71ro,43
+openai-1.26.0.dist-info/licenses/LICENSE,sha256=d0M6HDjQ76tf255XPlAGkIoECMe688MXcGEYsOFySfI,11336
+openai/__init__.py,sha256=hTM-EsfeafKBLu-n5AVSQVDB2MMBGnZoLtATFeW-OL0,10007
+openai/__main__.py,sha256=bYt9eEaoRQWdejEHFD8REx9jxVEdZptECFsV7F49Ink,30
+openai/_base_client.py,sha256=EvBV2cnkgHo9YD0BzCmKiZFVtioTnFusMVv3btr9J30,65358
+openai/_client.py,sha256=BIWse5bWvbEIyGNdGqM5RjjP6zD0oFvFKFtqbIO-xf4,21751
+openai/_compat.py,sha256=m0I0haqFZuVxd5m227_8nNmvA1saXyuNJ7BjidX_PTE,6389
+openai/_constants.py,sha256=L1pfEhuz_wM2w2_U9P_9JZzTbrN4pbLo207l96rtKcQ,469
+openai/_exceptions.py,sha256=IXzw429JsoOD9PbpqLWvncvuOuqU_GOZ1z9D494BZxU,3892
+openai/_extras/__init__.py,sha256=LZbJLZ7aFHRcI7uiY4-wFQTdMp-BF6FER1QMhKVFkWk,107
+openai/_extras/_common.py,sha256=NWWtgbdJsO3hQGQxaXGfVk0LjeIE5AFZ8VS_795hhMc,364
+openai/_extras/numpy_proxy.py,sha256=hwZXa_JBAPD5taRhor1tGxK26g5IaK52JclQDl-dky0,799
+openai/_extras/pandas_proxy.py,sha256=NCEt1Dqwc_0H85YdsWPDE3lPDJtYnBT8G-gJE_BCeEc,637
+openai/_files.py,sha256=O4WNhHahzd5ZRe4F69WlBJegBpQM3O9YGeXWNkz972Y,3632
+openai/_legacy_response.py,sha256=GLrqADb4ed3N5hffQJpU2nSZQ85OVLODB4JVcWFA4u4,15575
+openai/_models.py,sha256=uP4bh5E_BfBqh_uT0YwKoDMmxcIG0IWwnbtwtItnhO0,26096
+openai/_module_client.py,sha256=gF_2bbdosIwUt29sQgrQRJOgNREvXF-IDxe4XKGhHjY,2523
+openai/_qs.py,sha256=AOkSz4rHtK4YI3ZU_kzea-zpwBUgEY8WniGmTPyEimc,4846
+openai/_resource.py,sha256=IQihFzFLhGOiGSlT2dO1ESWSTg2XypgbtAldtGdTOqU,1100
+openai/_response.py,sha256=FhY-5uevGc0KRDmI0eH5n1g4ok-t4lcNq8aDnM-DWqE,28873
+openai/_streaming.py,sha256=t1UZrg53fVJB5Rs6k2sT9PBbvjp-IGrQzUq_5nlxKG4,13102
+openai/_types.py,sha256=sZvy7fSCEWzjt1Fw9gqYHLJ78q9eces6pzMYAbPSyHQ,6226
+openai/_utils/__init__.py,sha256=NqFXgdc-_0_h2jOO8BrNcNupWHYfa91i6DVDtxSr4Y4,1847
+openai/_utils/_logs.py,sha256=sFA_NejuNObTGGbfsXC03I38mrT9HjsgAJx4d3GP0ok,774
+openai/_utils/_proxy.py,sha256=DjcB-BBIRagSbMut2pF_jZavjda9sPvmQCKtVXBhs0I,1910
+openai/_utils/_streams.py,sha256=SMC90diFFecpEg_zgDRVbdR3hSEIgVVij4taD-noMLM,289
+openai/_utils/_sync.py,sha256=8zEEYfir8iCUcAMFtWd8cDi8NVEaZonc4sfLAYr16io,2269
+openai/_utils/_transform.py,sha256=NCz3q9_O-vuj60xVe-qzhEQ8uJWlZWJTsM-GwHDccf8,12958
+openai/_utils/_typing.py,sha256=tFbktdpdHCQliwzGsWysgn0P5H0JRdagkZdb_LegGkY,3838
+openai/_utils/_utils.py,sha256=1_mm0IcPWDckpwQrb5chWTqeG7JWst_ycXaoFUTXbzE,11497
+openai/_version.py,sha256=KeBePgCOIa22aNZ47LkW77hHmpTdpzVj1d2MCRSCEwY,159
+openai/cli/__init__.py,sha256=soGgtqyomgddl92H0KJRqHqGuaXIaghq86qkzLuVp7U,31
+openai/cli/_api/__init__.py,sha256=cj92MZq-9_1PQM8A4TQVsqKn5mcTDAGxHllJ0UvJOPE,58
+openai/cli/_api/_main.py,sha256=5yyfLURqCEaAN8B61gHaqVAaYgtyb9Xq0ncQ3P2BAh0,451
+openai/cli/_api/audio.py,sha256=HZDTRZT-qZTMsg7WOm-djCQlf874aSa3lxRvNG27wLM,3347
+openai/cli/_api/chat/__init__.py,sha256=MhFUQH9F6QCtbPMlbsU_DWTd7wc5DSCZ7Wy3FBGVij0,300
+openai/cli/_api/chat/completions.py,sha256=9Ztetyz7rm0gP5SOPWEcpzFJnJKuIEQit626vOq42bE,5363
+openai/cli/_api/completions.py,sha256=ysOmnbXpFz3VB5N_5USPdObiYew62vEn6rMtNFwTJGQ,6412
+openai/cli/_api/files.py,sha256=6nKXFnsC2QE0bGnVUAG7BTLSu6K1_MhPE0ZJACmzgRY,2345
+openai/cli/_api/image.py,sha256=ovBExdn8oUK9ImOpsPafesfAlmcftLP2p7d37hcUtKU,5062
+openai/cli/_api/models.py,sha256=pGmIGZToj3raGGpKvPSq_EVUR-dqg4Vi0PNfZH98D2E,1295
+openai/cli/_cli.py,sha256=WxqTnhVVtfzX0z7hV5fcvd3hkihaUgwOWpXOwyCS4Fc,6743
+openai/cli/_errors.py,sha256=7BYF2Kp_L6yKsZDNdg-gK71FMVCNjhrunfVVgh4Zy0M,479
+openai/cli/_models.py,sha256=tgsldjG216KpwgAZ5pS0sV02FQvONDJU2ElA4kCCiIU,491
+openai/cli/_progress.py,sha256=aMLssU9jh-LoqRYH3608jNos7r6vZKnHTRlHxFznzv4,1406
+openai/cli/_tools/__init__.py,sha256=cj92MZq-9_1PQM8A4TQVsqKn5mcTDAGxHllJ0UvJOPE,58
+openai/cli/_tools/_main.py,sha256=pakjEXHRHqYlTml-RxV7fNrRtRXzmZBinoPi1AJipFY,467
+openai/cli/_tools/fine_tunes.py,sha256=RQgYMzifk6S7Y1I1K6huqco2QxmXa7gVUlHl6SrKTSU,1543
+openai/cli/_tools/migrate.py,sha256=NAYWN90bkhOa_AeABjEt3uOZC20HQ0gA2MNBuMrz7fM,4910
+openai/cli/_utils.py,sha256=oiTc9MnxQh_zxAZ1OIHPkoDpCll0NF9ZgkdFHz4T-Bs,848
+openai/lib/.keep,sha256=wuNrz-5SXo3jJaJOJgz4vFHM41YH_g20F5cRQo0vLes,224
+openai/lib/_old_api.py,sha256=XZnXBrEKuTd70iJirj5mGW35fZoqruJobbBTq6bvg10,1947
+openai/lib/_validators.py,sha256=cXJXFuaAl7jeJcYHXXnFa4NHGtHs-_zt3Zs1VVCmQo4,35288
+openai/lib/azure.py,sha256=9fyl1ZIx3QV8j4F7B5nrE1mqPQ-gpdn8sQGr9jBvuUc,21479
+openai/lib/streaming/__init__.py,sha256=kD3LpjsqU7caDQDhB-YjTUl9qqbb5sPnGGSI2yQYC70,379
+openai/lib/streaming/_assistants.py,sha256=-gU50bd7FsvvO5Sp756dQ66wsSNyemzNIlJwpxitOhM,40467
+openai/pagination.py,sha256=B9ejXEAR_hYGLHfqb9xEEsE0u5dCUMjvplOce5dpY7M,2760
+openai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+openai/resources/__init__.py,sha256=odhKOSLopY06Kz2fJy9oafb2xViXgkLRJ2vn0Kc7qJA,4166
+openai/resources/audio/__init__.py,sha256=YM7FHvPKVlj_v6EIgfpUQsb6q4hS2hVQ3gfkgic0sP0,1687
+openai/resources/audio/audio.py,sha256=1HHcDRWT58KshYelRdSnJs-0bvMBRS1vOhnU-h_oP5s,4481
+openai/resources/audio/speech.py,sha256=A4_SwpCesEfHg89cxazNdrHz8JxNvUp5LlLNoMqo-0w,7876
+openai/resources/audio/transcriptions.py,sha256=bBdQZXzjamZIbe5R_Ji9JJ6W9nJCNN7EwQVinu572Pk,11128
+openai/resources/audio/translations.py,sha256=_NoBAOXYqMEtjeUhdoHF3DNb-UqnhqVrmfqgITvhajI,9070
+openai/resources/batches.py,sha256=HpMvKfSgC3F5ea8ZlmvvnJ5A0tkpzjMJkAioo4vk0Cs,17614
+openai/resources/beta/__init__.py,sha256=nXoV4P8WCrbEZuNMtptbIuy_LqlVafY9lJ2qfW35GFc,1636
+openai/resources/beta/assistants.py,sha256=cUwAeYr-JfdWE6sehSve3n-YaqOJEAFpIimjbwp9sqg,39350
+openai/resources/beta/beta.py,sha256=xw_dfi9ZpyRG4ChwweQtirWwsWxhAA4mXSV46D7pS5M,4485
+openai/resources/beta/threads/__init__.py,sha256=fQ_qdUVSfouVS5h47DlTb5mamChT4K-v-siPuuAB6do,1177
+openai/resources/beta/threads/messages.py,sha256=CBUP4HGGWRwp4nf3NwI9QdDOWYxHG1M-lqVJLYnyaRI,26157
+openai/resources/beta/threads/runs/__init__.py,sha256=2FfDaqwmJJCd-IVpY_CrzWcFvw0KFyQ3cm5jnTfI-DQ,771
+openai/resources/beta/threads/runs/runs.py,sha256=ihJYuUGzXK6fPdcEJpUIy7cWPDWzDIjuKPUQW4aXWdo,147289
+openai/resources/beta/threads/runs/steps.py,sha256=uRykb4JapSNZCF8OD54f5qOWtrp2GoU1k5uAZgA4kAk,12223
+openai/resources/beta/threads/threads.py,sha256=v4q822rU6vUelDK4GWuj74UGwbtcsB-xFIek5UOSd7g,99552
+openai/resources/beta/vector_stores/__init__.py,sha256=11Xn1vhgndWiI0defJHv31vmbtbDgh2GwZT3gX8GgHk,1296
+openai/resources/beta/vector_stores/file_batches.py,sha256=7lM7BSh6iLm34InyB_kvsxlJCWS-GnpUT7T5e0qllM8,29533
+openai/resources/beta/vector_stores/files.py,sha256=5yN7RmmRMmrTXpkORSllgfuPD8FoYpLHeSbUJEsqT7Q,27046
+openai/resources/beta/vector_stores/vector_stores.py,sha256=dlewvG7YwtCqXEXqaZuxU_xJEpViX2B-yN1WWaGGBKM,27141
+openai/resources/chat/__init__.py,sha256=8Q9ODRo1wIpFa34VaNwuaWFmxqFxagDtUhIAkQNvxEU,849
+openai/resources/chat/chat.py,sha256=Edexhbq1anfSS_I0wNRQb7rx1OV6-rq4sxgVlYDGb6Y,2342
+openai/resources/chat/completions.py,sha256=uMtKJiYRRIZ8o2MFwNTB2Kq4Tgt0KBDP2LP2B6uyyTQ,68761
+openai/resources/completions.py,sha256=4Rfv9o3XwI5GRfhN1RD4tEgNn0I2jb6TRW6j0b6bpZc,58712
+openai/resources/embeddings.py,sha256=cMSXtMc_7mBqlSiQ99B7qXYoRLGyoeIFazyYQ0jJ1O4,10755
+openai/resources/files.py,sha256=kXRhg8gjvjoksdJ47Y2VeKQ7Dlee09znexrGPFGR9bI,26152
+openai/resources/fine_tuning/__init__.py,sha256=s6uoq7gM4gwoywdOOZQkPeYiSbUl-OwpeuMhwJJk0lc,837
+openai/resources/fine_tuning/fine_tuning.py,sha256=-2k4d5ZDlCIoqonSOMtGLVl4Kk9n2yJoKvVMG3PoWW8,2410
+openai/resources/fine_tuning/jobs/__init__.py,sha256=_smlrwijZOCcsDWqKnofLxQM2QLucZzXgboL9zJBPHw,849
+openai/resources/fine_tuning/jobs/checkpoints.py,sha256=6uP1CCGkzE_n8FsVdTQ36eH_eiq24wOxQQ5zzOy0UEU,6456
+openai/resources/fine_tuning/jobs/jobs.py,sha256=MDluaeAYVfX0ky5Q8Nxy0Gx2DT05lXSBG8iDap53zds,26850
+openai/resources/images.py,sha256=vtVb0k94YWemgqwxq9XHDZazaxLm3S7PQDftzsXBlKk,24796
+openai/resources/models.py,sha256=XF3E56V62YZq-HrStUDDvfrT2RHj98P8Y-oOrPSPRX0,10222
+openai/resources/moderations.py,sha256=WPMrXyYXxFXHFyyF_xzg_1Uj3Xtb3KbxsnJm3SQYgcA,6685
+openai/types/__init__.py,sha256=KWciGl8_OEKoOmVP3_wHo26Qs3FMwRUl91P8CZXcHZY,2051
+openai/types/audio/__init__.py,sha256=slwR2gZwYMmTpPihbr1a2rryQuyfqeAGzgjluQwlmN4,494
+openai/types/audio/speech_create_params.py,sha256=uae8hceXzm75E3QXBC9dRMunYA2Mj2m7lUiG_fbuN70,1278
+openai/types/audio/transcription.py,sha256=jP13KGV0ZSgK3FkIZueDLrH4Yhafp5FkXBEP85deBAo,231
+openai/types/audio/transcription_create_params.py,sha256=H7LOzb4VHwhF_cm0MXMIDgfglmbu-T-gcrp1i2HJBqI,2226
+openai/types/audio/translation.py,sha256=_PhTtQ-s1yc-4kAKlgc88FTqUpXnNYfM2ld5IuRRGkA,195
+openai/types/audio/translation_create_params.py,sha256=pynqbAozfcVwu1U6C6xvauZSFlQxIz1cswSXJLfRI30,1506
+openai/types/batch.py,sha256=eIOIaJnDuv93fdefTI0WRfTm7MZH8gLBdF0B12JCiZw,2787
+openai/types/batch_create_params.py,sha256=Kh4ZGVNBFpO3mHakKNSktaUPc-cLpBrlh9RqyLjsnqk,1183
+openai/types/batch_error.py,sha256=Xxl-gYm0jerpYyI-mKSSVxRMQRubkoLUiOP9U3v72EM,622
+openai/types/batch_list_params.py,sha256=X1_sfRspuIMSDyXWVh0YnJ9vJLeOOH66TrvgEHueC84,705
+openai/types/batch_request_counts.py,sha256=nOzdL84OlZRycVNW99EDkdjCFqqKh68emaWT4Lx7dBE,410
+openai/types/beta/__init__.py,sha256=z2VmuulluQs5lVF22u2-FvbTQLpVhtz6hEcM1iUAXZc,2919
+openai/types/beta/assistant.py,sha256=9lrwz2SdGMf553qzYltklaVSKtdQIfR7WKBFJgUr_cg,4615
+openai/types/beta/assistant_create_params.py,sha256=bgrU6XrpJEkKF2v3gSkLQYYv-uHPXcNk4H01Ngrtzwg,6059
+openai/types/beta/assistant_deleted.py,sha256=bTTUl5FPHTBI5nRm7d0sGuR9VCSBDZ-IbOn9G_IpmJQ,301
+openai/types/beta/assistant_list_params.py,sha256=1-osjSX8tKieHSP0xaKBBU8j-J01fKrrxIJRHDudFHk,1220
+openai/types/beta/assistant_response_format.py,sha256=-JYxEihoHEHMak9E7KiyD5Zh_f3c-155j110mBDTFNE,378
+openai/types/beta/assistant_response_format_option.py,sha256=pDRz-lm-ASYhVIslXCulGAtO0c9Ulr6zVz-VltQQOh4,348
+openai/types/beta/assistant_response_format_option_param.py,sha256=JSH4wXdfgQBLMUagfVCn3clk9eErAUAiyZSQZ2XM-2w,410
+openai/types/beta/assistant_response_format_param.py,sha256=qtkwEg3hG3_ewmHH3E1hXsQqVqyMSTIOGFN9R1WTW0g,369
+openai/types/beta/assistant_stream_event.py,sha256=JprbttORwq5mJUpyziwCHH7vXBzuSqU-MbfNHWpeTEw,6529
+openai/types/beta/assistant_tool.py,sha256=ci9elhBtBQY3_0FefsDuKxyLLRrl5m9e_PSvShZqTSo,478
+openai/types/beta/assistant_tool_choice.py,sha256=Hy4HIfPQCkWD8VruHHicuTkomNwljGHviQHk36prKhg,544
+openai/types/beta/assistant_tool_choice_function.py,sha256=lMEPJrd2nIeNeTFTRKj8OTJmS--Zvu6kmzqjFR_iBlQ,271
+openai/types/beta/assistant_tool_choice_function_param.py,sha256=-O38277LhSaqOVhTp0haHP0ZnVTLpEBvcLJa5MRo7wE,355
+openai/types/beta/assistant_tool_choice_option.py,sha256=WaLj1FSgQyLrss5hoKbmb19C0hzD5_WP3bWgzNdZIMM,340
+openai/types/beta/assistant_tool_choice_option_param.py,sha256=ODCix7ElFxtyABiL09OhaYbQy9RjICCSmILeqBFWeLE,402
+openai/types/beta/assistant_tool_choice_param.py,sha256=NOWx9SzZEwYaHeAyFZTQlG3pmogMNXzjPJDGQUlbv7Q,572
+openai/types/beta/assistant_tool_param.py,sha256=xsB-Vq93uyS69m5zMoAc7keLXB_OSwEUH6XgB2g3ex4,450
+openai/types/beta/assistant_update_params.py,sha256=8YGYglHCQhoBCleaaKsDmR13LijeDgrhIhQ5Lo8B1L0,4363
+openai/types/beta/chat/__init__.py,sha256=OKfJYcKb4NObdiRObqJV_dOyDQ8feXekDUge2o_4pXQ,122
+openai/types/beta/code_interpreter_tool.py,sha256=7mgQc9OtD_ZUnZeNhoobMFcmmvtZPFCNYGB-PEnNnfs,333
+openai/types/beta/code_interpreter_tool_param.py,sha256=X6mwzFyZx1RCKEYbBCPs4kh_tZkxFxydPMK4yFNJkLs,389
+openai/types/beta/file_search_tool.py,sha256=u7Dw4G956UhhSF6zUvMU2Pyt9px3QEz8cno9Au_DofQ,313
+openai/types/beta/file_search_tool_param.py,sha256=nAON5EUoano9jVPYZMzMYMLCxde_43NBgtooPFpZcyU,369
+openai/types/beta/function_tool.py,sha256=oYGJfcfPpUohKw2ikgshDjOI1HXCK-5pAWyegYNezeU,397
+openai/types/beta/function_tool_param.py,sha256=T_k2OX1OULgkrHHXw0rY_J-O0y5qA0lM-B58C64YyfM,453
+openai/types/beta/thread.py,sha256=wd00j3ogUpOa_O0Sf1m6H4f8t1Nf05DKWiK_4m33O6s,2013
+openai/types/beta/thread_create_and_run_params.py,sha256=i-WBCNm1aCUKn-TC5xvv4p1W0DtyEPKvjoEq_ang9Ks,12517
+openai/types/beta/thread_create_params.py,sha256=A1JaofbmpOpA7gs8F-Olj5MS7ZwHD3xEIH_GQrWDD80,4389
+openai/types/beta/thread_deleted.py,sha256=MaYG_jZIjSiB9h_ZBiTtpMsRSwFKkCY83ziM5GO_oUk,292
+openai/types/beta/thread_update_params.py,sha256=RYsR88YHwReKLiLqnLlnWiReiVIGlEGvVV9-g_wptgM,1750
+openai/types/beta/threads/__init__.py,sha256=dneukcPQuYkP0N7UTmrbUXpjCl_9Uv1MpWJuAJCTE-A,2156
+openai/types/beta/threads/annotation.py,sha256=3VHiyDhcR2G-cQ48_itBsXDWlmfpUJ7rnjeMh_DsHgg,440
+openai/types/beta/threads/annotation_delta.py,sha256=aJ1A_paDRkRVivuCZrmOL4QRvVW3KmZxsGUgOJ7uzUU,488
+openai/types/beta/threads/file_citation_annotation.py,sha256=0-0cZw65Xp3Wuq3zcaeK-we2jUchZX5dsxriIpXYH38,653
+openai/types/beta/threads/file_citation_delta_annotation.py,sha256=R87tcXkJ0RiH5UJo0Qknwk7X_c4qF1qvGsu2spOPx-I,873
+openai/types/beta/threads/file_path_annotation.py,sha256=hNc4ebprJynqMG1yk0gLvgzTpjtVzgEbXriMZftkgew,552
+openai/types/beta/threads/file_path_delta_annotation.py,sha256=RW9dgDF9Ggf357fPZ-vUu2ge3U-Hf11DVTr-ecklsBY,755
+openai/types/beta/threads/image_file.py,sha256=dLOJ9_oTVoJllJqS96NfLQQ7i0To0up25TLl5OdXRt4,324
+openai/types/beta/threads/image_file_content_block.py,sha256=31I5trSERP2qLZpJ4ugZtIyta4DDoBhBvxkM4LovL3w,363
+openai/types/beta/threads/image_file_delta.py,sha256=RXiEnWIItjoRKiwecJ0LLWzils8zNvb80R_j1ZWcqQM,378
+openai/types/beta/threads/image_file_delta_block.py,sha256=XJ2YVX_cq0OiNcGbNmXO0_dca1IvPockOvvoM7pDvbI,492
+openai/types/beta/threads/message.py,sha256=MgO0oD_sXfGpTPhsLDCgg_bXeNeGAyUGYaWlsbMN9rc,3056
+openai/types/beta/threads/message_content.py,sha256=iAQm3X-YXbbkLpob_S3J4PnqTEdN_V_qfZAR-yolpTY,440
+openai/types/beta/threads/message_content_delta.py,sha256=9OiciDh1vCUT6r0q2ta-QTlORr5ESALZUNO0BYOeQns,438
+openai/types/beta/threads/message_create_params.py,sha256=KoP-6M3riWV5n3oEbmBBoU2v9prx7CGr_hXk0jF-fGo,1689
+openai/types/beta/threads/message_deleted.py,sha256=DNnrSfGZ3kWEazmo4mVTdLhiKlIHxs-D8Ef5sNdHY1o,303
+openai/types/beta/threads/message_delta.py,sha256=-kaRyvnIA8Yr2QV5jKRn15BU2Ni068a_WtWJ4PqlLfE,570
+openai/types/beta/threads/message_delta_event.py,sha256=7SpE4Dd3Lrc_cm97SzBwZzGGhfLqiFViDeTRQz-5YmQ,579
+openai/types/beta/threads/message_list_params.py,sha256=LXqc3deSkKO6VN337OlQ4fzG7dfgBE7Iv_CLzZHhbhw,1294
+openai/types/beta/threads/message_update_params.py,sha256=bw6_U-vZA4c9_CDmeGOh7IEPIm8BU3BBOKtxnii0LKA,629
+openai/types/beta/threads/required_action_function_tool_call.py,sha256=XsR4OBbxI-RWteLvhcLEDBan6eUUGvhLORFRKjPbsLg,888
+openai/types/beta/threads/run.py,sha256=D6TDDeIGMS39jc2TVY4HrVw0mpBDXhro9VIzeH2ejdg,7656
+openai/types/beta/threads/run_create_params.py,sha256=b37YY0_xLHNJjDJeKZoj38yJ-iGNrZFH1JcrkqDPqWI,8868
+openai/types/beta/threads/run_list_params.py,sha256=73poqeRcb5TEsIVn7OzJ_g9OajNokEzpCVLzVNKZmPk,1208
+openai/types/beta/threads/run_status.py,sha256=6KPJB7l0YfGSKzx4wuIP8SDiZSiaD2nb0KOf0uRPDP4,282
+openai/types/beta/threads/run_submit_tool_outputs_params.py,sha256=aDrg0FZZoJKaPVQzcFjUg4ZKaeW8KF6UJBxhJEIjC2I,1630
+openai/types/beta/threads/run_update_params.py,sha256=76dWMNa3zCUliemCdwWv6p07GNeMYCdZoJs9KNbdZSE,621
+openai/types/beta/threads/runs/__init__.py,sha256=uhxk5F1_5c5wg2_p70AjlOy9cE3Ga8-ILn4Ep-gcls4,1515
+openai/types/beta/threads/runs/code_interpreter_logs.py,sha256=7wXZpUE9I-oZJ0K3mFG0Nwmfm2bKGiSpWJyBeo7txwo,482
+openai/types/beta/threads/runs/code_interpreter_output_image.py,sha256=8o99k0ZHMHpqH0taXkOkYR9WaDUpCN-G0Ifd5XsJpb8,613
+openai/types/beta/threads/runs/code_interpreter_tool_call.py,sha256=Ydsi3ob7fyv1MqPY6tlZCD254Cc5XNLO-ddEGtKdqj4,1788
+openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py,sha256=eD-tvfFD7arq4w7dzQJFkmHrvLguVrDjpAJRNH6EwIE,1457
+openai/types/beta/threads/runs/file_search_tool_call.py,sha256=PPxrJP3r4RWFTeE5mU-9SbFz37JmKHOGfsxlZGydyW0,522
+openai/types/beta/threads/runs/file_search_tool_call_delta.py,sha256=Gx8c7GSgGYuOvGadcAr3ZIspEFMZS3e2OY7vBo_MYnM,655
+openai/types/beta/threads/runs/function_tool_call.py,sha256=aOq5yOtKOi6C5Q1FIQRxqtJJR1AcSW_K5PvRiKISNCI,920
+openai/types/beta/threads/runs/function_tool_call_delta.py,sha256=VFRtCJkj4PHX97upM1cXpJAk9-JvJSgyngie06fBIjQ,1076
+openai/types/beta/threads/runs/message_creation_step_details.py,sha256=tRFMNF2Rf4DekVliUKkoujItiOjjAE9EG9bbxJvpVPA,506
+openai/types/beta/threads/runs/run_step.py,sha256=UvPakztDIofP8K80Q1gfQSXF18xxp2w9KWRwrcHhjnE,3440
+openai/types/beta/threads/runs/run_step_delta.py,sha256=lNPH43tdQMHHEiaxaS0FtLXsqtH5xOJpYJlAroj7PHg,635
+openai/types/beta/threads/runs/run_step_delta_event.py,sha256=rkDyvHSXt-hc1LngB41f9vglkn6t03kS62bsn0iGaxU,585
+openai/types/beta/threads/runs/run_step_delta_message_delta.py,sha256=UIo6oPH8STLjPHiWL-A4CtKfYe49uptvIAHWNnZ3Ums,564
+openai/types/beta/threads/runs/step_list_params.py,sha256=2vMPFMElvK135ncP9ch6kUnzPGOSIPT3Eio18jJhAqk,1250
+openai/types/beta/threads/runs/tool_call.py,sha256=zyck1JNKBPCIGCMrJN6P850D10Y36FO6LwrX2WM_YR8,515
+openai/types/beta/threads/runs/tool_call_delta.py,sha256=OZeU5fF-77_oG87xNVn_wZo4SpDfjJ5ND9rIQQYKPoE,578
+openai/types/beta/threads/runs/tool_call_delta_object.py,sha256=eK20VsIswEyT48XbkGu60HUrE7OD3fhpn1fbXrVauM4,615
+openai/types/beta/threads/runs/tool_calls_step_details.py,sha256=bDa-yybVF3a8H6VqhDGmFZMkpn-0gtPQM2jWWsmUvYo,574
+openai/types/beta/threads/text.py,sha256=9gjmDCqoptnxQ8Jhym87pECyd6m1lB3daCxKNzSFp4Y,319
+openai/types/beta/threads/text_content_block.py,sha256=pdGlKYM1IF9PjTvxjxo1oDg1XeGCFdJdl0kJVpZ7jIs,319
+openai/types/beta/threads/text_delta.py,sha256=2EFeQCkg_cc8nYEJ6BtYAA3_TqgMTbmEXoMvLjzaB34,389
+openai/types/beta/threads/text_delta_block.py,sha256=pkHkVBgNsmHi9JURzs5ayPqxQXSkex3F0jH0MqJXik0,448
+openai/types/beta/vector_store.py,sha256=zaSaSUpStD3iuyas9f7VQCNF1byxnXRz_5q36eizNGE,2353
+openai/types/beta/vector_store_create_params.py,sha256=tddpQ1KDswqOqzy-ijmSEN5_A-QL8-RbjD8uUf1w5XY,1321
+openai/types/beta/vector_store_deleted.py,sha256=Yq0E1orRLShseLwZ1deiBdDEUgEw_tcYVxGYa5gbIrM,308
+openai/types/beta/vector_store_list_params.py,sha256=8iUgSgs_TeehprKjtTLWOGeH_R8LbDdLkdwMq9xVpSA,1224
+openai/types/beta/vector_store_update_params.py,sha256=AHlOV4f36UWAH4k7XKlGa51Mfao2f7339qI3fskWbIk,1114
+openai/types/beta/vector_stores/__init__.py,sha256=gXfm8V5Ad0iueaC_VoHDUQvSdwSfBzk2cQNwZldvY0s,671
+openai/types/beta/vector_stores/file_batch_create_params.py,sha256=XohBafDsiYkBXDWE32W62UGKBL4jRyyItjIZBIzqQmo,519
+openai/types/beta/vector_stores/file_batch_list_files_params.py,sha256=6c_KvnlFV0vkFid_thhyEK6HC6F1ixbDh2roExL_-qk,1449
+openai/types/beta/vector_stores/file_create_params.py,sha256=0LOEMzQYWoGW6HFrDNhXu1YF_rPwDv28C0yPA5WXyoU,469
+openai/types/beta/vector_stores/file_list_params.py,sha256=UC6NzZQ79tInL8xV3pMm66IFWsIT9PW_BhSbQLm4ar4,1383
+openai/types/beta/vector_stores/vector_store_file.py,sha256=_08rc2lNwXI8keTI6DBGa55DJ12JvxlS2qHwE6iqptQ,1645
+openai/types/beta/vector_stores/vector_store_file_batch.py,sha256=ubvj8z95EOdRGAp0rgI94g5uFQx0ob8hLgwOWHKda4E,1457
+openai/types/beta/vector_stores/vector_store_file_deleted.py,sha256=37J7oL2WYCgOd7Rhg2jX6IavaZT63vgUf3u6LC6C3Hs,322
+openai/types/chat/__init__.py,sha256=i9KSiVYkzHjyIw2UijlmMiZS6Oky5JxaW5teonqW-T8,2583
+openai/types/chat/chat_completion.py,sha256=DKbYEGcHPzNZyr8tSSbH1CsZJmRmS48SQOvolMByRx4,2321
+openai/types/chat/chat_completion_assistant_message_param.py,sha256=D2wua_9eZnKZEKu-0OC3o5w6nThu7f4HndthRVN6VsQ,1638
+openai/types/chat/chat_completion_chunk.py,sha256=xrdbDvMOQ60a6MNJyWlMGsxyQZ7fi4aabxXPT0xwYeM,4575
+openai/types/chat/chat_completion_content_part_image_param.py,sha256=ODHcWpe8TIXZQHXHhEEacrRHm_TCaFWZnml-bD85XiU,797
+openai/types/chat/chat_completion_content_part_param.py,sha256=XGzw9ocldPg6Ke3ykNRuoxfORAAPtWXe4_SP1iURTDc,486
+openai/types/chat/chat_completion_content_part_text_param.py,sha256=4IpiXMKM9AuTyop5PRptPBbBhh9s93xy2vjg4Yw6NIw,429
+openai/types/chat/chat_completion_function_call_option_param.py,sha256=M-IqWHyBLkvYBcwFxxp4ydCIxbPDaMlNl4bik9UoFd4,365
+openai/types/chat/chat_completion_function_message_param.py,sha256=jIaZbBHHbt4v4xHCIyvYtYLst_X4jOznRjYNcTf0MF0,591
+openai/types/chat/chat_completion_message.py,sha256=19e2EL6cHZA6EeOVPgI_LbN3UwNLKizhtxuXnxLzhX0,1282
+openai/types/chat/chat_completion_message_param.py,sha256=RGdT7OjJPQTd2M0drDVNxBkUB-9DHMkQjNolaOY9nw0,838
+openai/types/chat/chat_completion_message_tool_call.py,sha256=XlIe2vhSYvrt8o8Yol5AQqnacI1xHqpEIV26G4oNrZY,900
+openai/types/chat/chat_completion_message_tool_call_param.py,sha256=XNhuUpGr5qwVTo0K8YavJwleHYSdwN_urK51eKlqC24,1009
+openai/types/chat/chat_completion_named_tool_choice_param.py,sha256=JsxfSJYpOmF7zIreQ0JrXRSLp07OGCBSycRRcF6OZmg,569
+openai/types/chat/chat_completion_role.py,sha256=F5BlM6FMrJmqtCx3-W-KjhXXrVYAWv87_alwF7fOTSM,240
+openai/types/chat/chat_completion_stream_options_param.py,sha256=7-R2mYh7dbtX9qDOL3UkeyVH6FNWC_4aTCLtHYObMbs,628
+openai/types/chat/chat_completion_system_message_param.py,sha256=qWEJupmzMuUa82V7OoLeQF92SKE1QoU4cXfX2o43x9E,638
+openai/types/chat/chat_completion_token_logprob.py,sha256=6-ipUFfsXMf5L7FDFi127NaVkDtmEooVgGBF6Ts965A,1769
+openai/types/chat/chat_completion_tool_choice_option_param.py,sha256=cGMIgf6e5KG1xbP1_dg-S_ktD78ECkDAPFekFBHH0PU,444
+openai/types/chat/chat_completion_tool_message_param.py,sha256=B-PST-J1VwPjaKLpzpmqfEsHlr5Owb54dnQoIhbvuY4,553
+openai/types/chat/chat_completion_tool_param.py,sha256=sve2G1DayUs-1CMzXK1x104r8KTa5K62CZdxoyLmFlk,485
+openai/types/chat/chat_completion_user_message_param.py,sha256=mik-MRkwb543C5FSJ52LtTkeA2E_HdLUgtoHEdO73XQ,792
+openai/types/chat/completion_create_params.py,sha256=8GKvdVT9St2_c1ZX6jWIeFMA_b8mCNRraEWFIhICX8k,10240
+openai/types/chat_model.py,sha256=svOsfQBsRI9d4Fb9e2TUBi5e5VE2CHCN5bFMVABhKBY,620
+openai/types/completion.py,sha256=yuYVEVkJcMVUINNLglkxOJqCx097HKCYFeJun3Js73A,1172
+openai/types/completion_choice.py,sha256=PUk77T3Cp34UJSXoMfSzTKGWDK0rQQwq84X_PSlOUJo,965
+openai/types/completion_create_params.py,sha256=mEyR068kk36ZylY4d1K3sqnucpUz9fAqEyoEwmW3DtQ,7567
+openai/types/completion_usage.py,sha256=MIa0LipVCM88I9h71aXF_aVkNVt47iTa74gDtWvDDTA,436
+openai/types/create_embedding_response.py,sha256=lTAu_Pym76kFljDnnDRoDB2GNQSzWmwwlqf5ff7FNPM,798
+openai/types/embedding.py,sha256=2pV6RTSf5UV6E86Xeud5ZwmjQjMS93m_4LrQ0GN3fho,637
+openai/types/embedding_create_params.py,sha256=3p7U8i2uG1SCpELbn_IeDMLkFe-vv7cyB5dx-_4U8iU,1885
+openai/types/file_content.py,sha256=E2CsQejO19KSjdShjg5nsCtS4BbBwhPVDSfFEUA8ZNM,133
+openai/types/file_create_params.py,sha256=gpZJLxy2Q7zPrfYY_fFEF19P5BDldzHx7v0sCPLgCMw,873
+openai/types/file_deleted.py,sha256=H_r9U7XthT5xHAo_4ay1EGGkc21eURt8MkkIBRYiQcw,277
+openai/types/file_list_params.py,sha256=VhZbSrCO0fYnUTgPE_nuBy-3A5MjpXiBtI-BahAc5SY,310
+openai/types/file_object.py,sha256=9AHXLSU2ntSagFzh96i0qDYxeQOzDeMkIUPU9hmeEFI,1226
+openai/types/fine_tuning/__init__.py,sha256=SZvjq_22oY9E4zcnrvVd0ul9U4sk_IBeOd0MsNALu5s,806
+openai/types/fine_tuning/fine_tuning_job.py,sha256=YOcsIJZPPAqOnQudOkS_Am-peQuHyyvcMWVDxFvJdEA,3861
+openai/types/fine_tuning/fine_tuning_job_event.py,sha256=oCkO0yImLZnZQLeU4GH6YyUlDG25pzs41SCWWB-sd_o,374
+openai/types/fine_tuning/fine_tuning_job_integration.py,sha256=YZI3gQSE9zhfAcghYryzoug_IPfdog_fsjf2eCIMzD8,243
+openai/types/fine_tuning/fine_tuning_job_wandb_integration.py,sha256=YnBeiz14UuhUSpnD0KBj5V143qLvJbDIMcUVWOCBLXY,1026
+openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py,sha256=7vEc2uEV2c_DENBjhq0Qy5X8B-rzxsKvGECjnvF1Wdw,804
+openai/types/fine_tuning/job_create_params.py,sha256=3wbT1U-LnSHyTa2AnRaCHeQhA_I4GP0oyoBx-2D54N4,4407
+openai/types/fine_tuning/job_list_events_params.py,sha256=4xOED4H2ky2mI9sIDytjmfJz5bNAdNWb70WIb_0bBWs,400
+openai/types/fine_tuning/job_list_params.py,sha256=yjxaEnESVTRpJ9ItvjKq30KcD_xz_trqKMIxG2eAriE,396
+openai/types/fine_tuning/jobs/__init__.py,sha256=nuWhOUsmsoVKTKMU35kknmr8sfpTF-kkIzyuOlRbJj0,295
+openai/types/fine_tuning/jobs/checkpoint_list_params.py,sha256=XoDLkkKCWmf5an5rnoVEpNK8mtQHq1fHw9EqmezfrXM,415
+openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py,sha256=Z_sUhebJY9nWSssZU7QoOJwe5sez76sCAuVeSO63XhY,1347
+openai/types/image.py,sha256=9No-8GHesOUbjchemY1jqtMwh_s22oBmLVFlLn2KoQo,607
+openai/types/image_create_variation_params.py,sha256=3f0qYfKrSuYA2gv7lyCq0FsRM36QctZ_Ki2YPLeNNj4,1450
+openai/types/image_edit_params.py,sha256=oQIiKqlU_59H1f0HtBlQw_BJ7mBEXRispfoGuDnfXHI,1810
+openai/types/image_generate_params.py,sha256=YztuD1oHepGqmP-m78Uhay67IgwGk7CspdAn2YWihlw,2116
+openai/types/images_response.py,sha256=EJ4qxYZ8CPGh2SZdRsyw6I0FnUvlgwxwc4NgPovJrvk,274
+openai/types/model.py,sha256=DMw8KwQx8B6S6sAI038D0xdzkmYdY5-r0oMhCUG4l6w,532
+openai/types/model_deleted.py,sha256=rDGU-Ul4lMfNf5XxKNxZKo9CQPGsrkrzqnhl00GLMi4,230
+openai/types/moderation.py,sha256=ihR2jzld_BfOaHW1_6A2csTInEaJvAl5nPxuh_jegY4,3933
+openai/types/moderation_create_params.py,sha256=Rz8kzoisqPihOLdPjrSchM0uml5VPHV8DqcrE56rwUs,954
+openai/types/moderation_create_response.py,sha256=e6SVfWX2_JX25Za0C6KojcnbMTtDB2A7cjUm6cFMKcs,484
+openai/types/shared/__init__.py,sha256=eoiCHGKeY1_YjOn41M8QxvIUI_M68Ltsr1d67g_Pr-I,288
+openai/types/shared/error_object.py,sha256=G7SGPZ9Qw3gewTKbi3fK69eM6L2Ur0C2D57N8iEapJA,305
+openai/types/shared/function_definition.py,sha256=n505SpWCIf_ntWZZ8liz0rcLhLxUsdnULsM5IA0fBUk,1067
+openai/types/shared/function_parameters.py,sha256=jhabBaJFMgWfFduqmKQ0dkKfK5DWlwgde30SlZVcCYc,185
+openai/types/shared_params/__init__.py,sha256=Jaw3mmmUB3Ky7vL1fzsh-8kAJEbeYxcQ0JOy7p765Xo,235
+openai/types/shared_params/function_definition.py,sha256=zq61IKY91bRJ346qkrS3_5w3R-xKgEEIdkXdN-Zj9Uc,1078
+openai/types/shared_params/function_parameters.py,sha256=vqZAZwPBh14Ykp84NFTXF_j0eoDyqF9V_d8-_n-KF9w,221
+openai/version.py,sha256=cjbXKO8Ut3aiv4YlQnugff7AdC48MpSndcx96q88Yb8,62
diff --git a/portkey_ai/_vendor/openai-1.26.0.dist-info/REQUESTED b/portkey_ai/_vendor/openai-1.26.0.dist-info/REQUESTED
new file mode 100644
index 00000000..e69de29b
diff --git a/portkey_ai/_vendor/openai-1.26.0.dist-info/WHEEL b/portkey_ai/_vendor/openai-1.26.0.dist-info/WHEEL
new file mode 100644
index 00000000..516596c7
--- /dev/null
+++ b/portkey_ai/_vendor/openai-1.26.0.dist-info/WHEEL
@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.24.2
+Root-Is-Purelib: true
+Tag: py3-none-any
diff --git a/portkey_ai/_vendor/openai-1.26.0.dist-info/entry_points.txt b/portkey_ai/_vendor/openai-1.26.0.dist-info/entry_points.txt
new file mode 100644
index 00000000..98999396
--- /dev/null
+++ b/portkey_ai/_vendor/openai-1.26.0.dist-info/entry_points.txt
@@ -0,0 +1,2 @@
+[console_scripts]
+openai = openai.cli:main
diff --git a/portkey_ai/_vendor/openai-1.26.0.dist-info/licenses/LICENSE b/portkey_ai/_vendor/openai-1.26.0.dist-info/licenses/LICENSE
new file mode 100644
index 00000000..621a6bec
--- /dev/null
+++ b/portkey_ai/_vendor/openai-1.26.0.dist-info/licenses/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2024 OpenAI
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/portkey_ai/_vendor/openai/__init__.py b/portkey_ai/_vendor/openai/__init__.py
new file mode 100644
index 00000000..c49cb409
--- /dev/null
+++ b/portkey_ai/_vendor/openai/__init__.py
@@ -0,0 +1,370 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os as _os
+from typing_extensions import override
+
+from . import types
+from ._types import NOT_GIVEN, NoneType, NotGiven, Transport, ProxiesTypes
+from ._utils import file_from_path
+from ._client import (
+    Client,
+    OpenAI,
+    Stream,
+    Timeout,
+    Transport,
+    AsyncClient,
+    AsyncOpenAI,
+    AsyncStream,
+    RequestOptions,
+)
+from ._models import BaseModel
+from ._version import __title__, __version__
+from ._response import APIResponse as APIResponse, AsyncAPIResponse as AsyncAPIResponse
+from ._constants import DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES, DEFAULT_CONNECTION_LIMITS
+from ._exceptions import (
+    APIError,
+    OpenAIError,
+    ConflictError,
+    NotFoundError,
+    APIStatusError,
+    RateLimitError,
+    APITimeoutError,
+    BadRequestError,
+    APIConnectionError,
+    AuthenticationError,
+    InternalServerError,
+    PermissionDeniedError,
+    UnprocessableEntityError,
+    APIResponseValidationError,
+)
+from ._base_client import DefaultHttpxClient, DefaultAsyncHttpxClient
+from ._utils._logs import setup_logging as _setup_logging
+
+__all__ = [
+    "types",
+    "__version__",
+    "__title__",
+    "NoneType",
+    "Transport",
+    "ProxiesTypes",
+    "NotGiven",
+    "NOT_GIVEN",
+    "OpenAIError",
+    "APIError",
+    "APIStatusError",
+    "APITimeoutError",
+    "APIConnectionError",
+    "APIResponseValidationError",
+    "BadRequestError",
+    "AuthenticationError",
+    "PermissionDeniedError",
+    "NotFoundError",
+    "ConflictError",
+    "UnprocessableEntityError",
+    "RateLimitError",
+    "InternalServerError",
+    "Timeout",
+    "RequestOptions",
+    "Client",
+    "AsyncClient",
+    "Stream",
+    "AsyncStream",
+    "OpenAI",
+    "AsyncOpenAI",
+    "file_from_path",
+    "BaseModel",
+    "DEFAULT_TIMEOUT",
+    "DEFAULT_MAX_RETRIES",
+    "DEFAULT_CONNECTION_LIMITS",
+    "DefaultHttpxClient",
+    "DefaultAsyncHttpxClient",
+]
+
+from .lib import azure as _azure
+from .version import VERSION as VERSION
+from .lib.azure import AzureOpenAI as AzureOpenAI, AsyncAzureOpenAI as AsyncAzureOpenAI
+from .lib._old_api import *
+from .lib.streaming import (
+    AssistantEventHandler as AssistantEventHandler,
+    AsyncAssistantEventHandler as AsyncAssistantEventHandler,
+)
+
+_setup_logging()
+
+# Update the __module__ attribute for exported symbols so that
+# error messages point to this module instead of the module
+# it was originally defined in, e.g.
+# openai._exceptions.NotFoundError -> openai.NotFoundError
+__locals = locals()
+for __name in __all__:
+    if not __name.startswith("__"):
+        try:
+            __locals[__name].__module__ = "openai"
+        except (TypeError, AttributeError):
+            # Some of our exported symbols are builtins which we can't set attributes for.
+            pass
+
+# ------ Module level client ------
+import typing as _t
+import typing_extensions as _te
+
+import httpx as _httpx
+
+from ._base_client import DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES
+
+api_key: str | None = None
+
+organization: str | None = None
+
+project: str | None = None
+
+base_url: str | _httpx.URL | None = None
+
+timeout: float | Timeout | None = DEFAULT_TIMEOUT
+
+max_retries: int = DEFAULT_MAX_RETRIES
+
+default_headers: _t.Mapping[str, str] | None = None
+
+default_query: _t.Mapping[str, object] | None = None
+
+http_client: _httpx.Client | None = None
+
+_ApiType = _te.Literal["openai", "azure"]
+
+api_type: _ApiType | None = _t.cast(_ApiType, _os.environ.get("OPENAI_API_TYPE"))
+
+api_version: str | None = _os.environ.get("OPENAI_API_VERSION")
+
+azure_endpoint: str | None = _os.environ.get("AZURE_OPENAI_ENDPOINT")
+
+azure_ad_token: str | None = _os.environ.get("AZURE_OPENAI_AD_TOKEN")
+
+azure_ad_token_provider: _azure.AzureADTokenProvider | None = None
+
+
+class _ModuleClient(OpenAI):
+    # Note: we have to use type: ignores here as overriding class members
+    # with properties is technically unsafe but it is fine for our use case
+
+    @property  # type: ignore
+    @override
+    def api_key(self) -> str | None:
+        return api_key
+
+    @api_key.setter  # type: ignore
+    def api_key(self, value: str | None) -> None:  # type: ignore
+        global api_key
+
+        api_key = value
+
+    @property  # type: ignore
+    @override
+    def organization(self) -> str | None:
+        return organization
+
+    @organization.setter  # type: ignore
+    def organization(self, value: str | None) -> None:  # type: ignore
+        global organization
+
+        organization = value
+
+    @property  # type: ignore
+    @override
+    def project(self) -> str | None:
+        return project
+
+    @project.setter  # type: ignore
+    def project(self, value: str | None) -> None:  # type: ignore
+        global project
+
+        project = value
+
+    @property
+    @override
+    def base_url(self) -> _httpx.URL:
+        if base_url is not None:
+            return _httpx.URL(base_url)
+
+        return super().base_url
+
+    @base_url.setter
+    def base_url(self, url: _httpx.URL | str) -> None:
+        super().base_url = url  # type: ignore[misc]
+
+    @property  # type: ignore
+    @override
+    def timeout(self) -> float | Timeout | None:
+        return timeout
+
+    @timeout.setter  # type: ignore
+    def timeout(self, value: float | Timeout | None) -> None:  # type: ignore
+        global timeout
+
+        timeout = value
+
+    @property  # type: ignore
+    @override
+    def max_retries(self) -> int:
+        return max_retries
+
+    @max_retries.setter  # type: ignore
+    def max_retries(self, value: int) -> None:  # type: ignore
+        global max_retries
+
+        max_retries = value
+
+    @property  # type: ignore
+    @override
+    def _custom_headers(self) -> _t.Mapping[str, str] | None:
+        return default_headers
+
+    @_custom_headers.setter  # type: ignore
+    def _custom_headers(self, value: _t.Mapping[str, str] | None) -> None:  # type: ignore
+        global default_headers
+
+        default_headers = value
+
+    @property  # type: ignore
+    @override
+    def _custom_query(self) -> _t.Mapping[str, object] | None:
+        return default_query
+
+    @_custom_query.setter  # type: ignore
+    def _custom_query(self, value: _t.Mapping[str, object] | None) -> None:  # type: ignore
+        global default_query
+
+        default_query = value
+
+    @property  # type: ignore
+    @override
+    def _client(self) -> _httpx.Client:
+        return http_client or super()._client
+
+    @_client.setter  # type: ignore
+    def _client(self, value: _httpx.Client) -> None:  # type: ignore
+        global http_client
+
+        http_client = value
+
+
+class _AzureModuleClient(_ModuleClient, AzureOpenAI):  # type: ignore
+    ...
+
+
+class _AmbiguousModuleClientUsageError(OpenAIError):
+    def __init__(self) -> None:
+        super().__init__(
+            "Ambiguous use of module client; please set `openai.api_type` or the `OPENAI_API_TYPE` environment variable to `openai` or `azure`"
+        )
+
+
+def _has_openai_credentials() -> bool:
+    return _os.environ.get("OPENAI_API_KEY") is not None
+
+
+def _has_azure_credentials() -> bool:
+    return (
+        azure_endpoint is not None
+        or _os.environ.get("AZURE_OPENAI_API_KEY") is not None
+    )
+
+
+def _has_azure_ad_credentials() -> bool:
+    return (
+        _os.environ.get("AZURE_OPENAI_AD_TOKEN") is not None
+        or azure_ad_token is not None
+        or azure_ad_token_provider is not None
+    )
+
+
+_client: OpenAI | None = None
+
+
+def _load_client() -> OpenAI:  # type: ignore[reportUnusedFunction]
+    global _client
+
+    if _client is None:
+        global api_type, azure_endpoint, azure_ad_token, api_version
+
+        if azure_endpoint is None:
+            azure_endpoint = _os.environ.get("AZURE_OPENAI_ENDPOINT")
+
+        if azure_ad_token is None:
+            azure_ad_token = _os.environ.get("AZURE_OPENAI_AD_TOKEN")
+
+        if api_version is None:
+            api_version = _os.environ.get("OPENAI_API_VERSION")
+
+        if api_type is None:
+            has_openai = _has_openai_credentials()
+            has_azure = _has_azure_credentials()
+            has_azure_ad = _has_azure_ad_credentials()
+
+            if has_openai and (has_azure or has_azure_ad):
+                raise _AmbiguousModuleClientUsageError()
+
+            if (
+                azure_ad_token is not None or azure_ad_token_provider is not None
+            ) and _os.environ.get("AZURE_OPENAI_API_KEY") is not None:
+                raise _AmbiguousModuleClientUsageError()
+
+            if has_azure or has_azure_ad:
+                api_type = "azure"
+            else:
+                api_type = "openai"
+
+        if api_type == "azure":
+            _client = _AzureModuleClient(  # type: ignore
+                api_version=api_version,
+                azure_endpoint=azure_endpoint,
+                api_key=api_key,
+                azure_ad_token=azure_ad_token,
+                azure_ad_token_provider=azure_ad_token_provider,
+                organization=organization,
+                base_url=base_url,
+                timeout=timeout,
+                max_retries=max_retries,
+                default_headers=default_headers,
+                default_query=default_query,
+                http_client=http_client,
+            )
+            return _client
+
+        _client = _ModuleClient(
+            api_key=api_key,
+            organization=organization,
+            project=project,
+            base_url=base_url,
+            timeout=timeout,
+            max_retries=max_retries,
+            default_headers=default_headers,
+            default_query=default_query,
+            http_client=http_client,
+        )
+        return _client
+
+    return _client
+
+
+def _reset_client() -> None:  # type: ignore[reportUnusedFunction]
+    global _client
+
+    _client = None
+
+
+from ._module_client import (
+    beta as beta,
+    chat as chat,
+    audio as audio,
+    files as files,
+    images as images,
+    models as models,
+    batches as batches,
+    embeddings as embeddings,
+    completions as completions,
+    fine_tuning as fine_tuning,
+    moderations as moderations,
+)
diff --git a/portkey_ai/_vendor/openai/__main__.py b/portkey_ai/_vendor/openai/__main__.py
new file mode 100644
index 00000000..4e28416e
--- /dev/null
+++ b/portkey_ai/_vendor/openai/__main__.py
@@ -0,0 +1,3 @@
+from .cli import main
+
+main()
diff --git a/portkey_ai/_vendor/openai/_base_client.py b/portkey_ai/_vendor/openai/_base_client.py
new file mode 100644
index 00000000..7a51c42d
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_base_client.py
@@ -0,0 +1,2107 @@
+from __future__ import annotations
+
+import json
+import time
+import uuid
+import email
+import asyncio
+import inspect
+import logging
+import platform
+import warnings
+import email.utils
+from types import TracebackType
+from random import random
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Type,
+    Union,
+    Generic,
+    Mapping,
+    TypeVar,
+    Iterable,
+    Iterator,
+    Optional,
+    Generator,
+    AsyncIterator,
+    cast,
+    overload,
+)
+from typing_extensions import Literal, override, get_origin
+
+import anyio
+import httpx
+import distro
+import pydantic
+from httpx import URL, Limits
+from pydantic import PrivateAttr
+
+from . import _exceptions
+from ._qs import Querystring
+from ._files import to_httpx_files, async_to_httpx_files
+from ._types import (
+    NOT_GIVEN,
+    Body,
+    Omit,
+    Query,
+    Headers,
+    Timeout,
+    NotGiven,
+    ResponseT,
+    Transport,
+    AnyMapping,
+    PostParser,
+    ProxiesTypes,
+    RequestFiles,
+    HttpxSendArgs,
+    AsyncTransport,
+    RequestOptions,
+    ModelBuilderProtocol,
+)
+from ._utils import is_dict, is_list, is_given, lru_cache, is_mapping
+from ._compat import model_copy, model_dump
+from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type
+from ._response import (
+    APIResponse,
+    BaseAPIResponse,
+    AsyncAPIResponse,
+    extract_response_type,
+)
+from ._constants import (
+    DEFAULT_TIMEOUT,
+    MAX_RETRY_DELAY,
+    DEFAULT_MAX_RETRIES,
+    INITIAL_RETRY_DELAY,
+    RAW_RESPONSE_HEADER,
+    OVERRIDE_CAST_TO_HEADER,
+    DEFAULT_CONNECTION_LIMITS,
+)
+from ._streaming import Stream, SSEDecoder, AsyncStream, SSEBytesDecoder
+from ._exceptions import (
+    APIStatusError,
+    APITimeoutError,
+    APIConnectionError,
+    APIResponseValidationError,
+)
+from ._legacy_response import LegacyAPIResponse
+
+log: logging.Logger = logging.getLogger(__name__)
+
+# TODO: make base page type vars covariant
+SyncPageT = TypeVar("SyncPageT", bound="BaseSyncPage[Any]")
+AsyncPageT = TypeVar("AsyncPageT", bound="BaseAsyncPage[Any]")
+
+
+_T = TypeVar("_T")
+_T_co = TypeVar("_T_co", covariant=True)
+
+_StreamT = TypeVar("_StreamT", bound=Stream[Any])
+_AsyncStreamT = TypeVar("_AsyncStreamT", bound=AsyncStream[Any])
+
+if TYPE_CHECKING:
+    from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+else:
+    try:
+        from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+    except ImportError:
+        # taken from https://github.com/encode/httpx/blob/3ba5fe0d7ac70222590e759c31442b1cab263791/httpx/_config.py#L366
+        HTTPX_DEFAULT_TIMEOUT = Timeout(5.0)
+
+
+class PageInfo:
+    """Stores the necessary information to build the request to retrieve the next page.
+
+    Either `url` or `params` must be set.
+    """
+
+    url: URL | NotGiven
+    params: Query | NotGiven
+
+    @overload
+    def __init__(
+        self,
+        *,
+        url: URL,
+    ) -> None:
+        ...
+
+    @overload
+    def __init__(
+        self,
+        *,
+        params: Query,
+    ) -> None:
+        ...
+
+    def __init__(
+        self,
+        *,
+        url: URL | NotGiven = NOT_GIVEN,
+        params: Query | NotGiven = NOT_GIVEN,
+    ) -> None:
+        self.url = url
+        self.params = params
+
+
+class BasePage(GenericModel, Generic[_T]):
+    """
+    Defines the core interface for pagination.
+
+    Type Args:
+        ModelT: The pydantic model that represents an item in the response.
+
+    Methods:
+        has_next_page(): Check if there is another page available
+        next_page_info(): Get the necessary information to make a request for the next page
+    """
+
+    _options: FinalRequestOptions = PrivateAttr()
+    _model: Type[_T] = PrivateAttr()
+
+    def has_next_page(self) -> bool:
+        items = self._get_page_items()
+        if not items:
+            return False
+        return self.next_page_info() is not None
+
+    def next_page_info(self) -> Optional[PageInfo]:
+        ...
+
+    def _get_page_items(self) -> Iterable[_T]:  # type: ignore[empty-body]
+        ...
+
+    def _params_from_url(self, url: URL) -> httpx.QueryParams:
+        # TODO: do we have to preprocess params here?
+        return httpx.QueryParams(cast(Any, self._options.params)).merge(url.params)
+
+    def _info_to_options(self, info: PageInfo) -> FinalRequestOptions:
+        options = model_copy(self._options)
+        options._strip_raw_response_header()
+
+        if not isinstance(info.params, NotGiven):
+            options.params = {**options.params, **info.params}
+            return options
+
+        if not isinstance(info.url, NotGiven):
+            params = self._params_from_url(info.url)
+            url = info.url.copy_with(params=params)
+            options.params = dict(url.params)
+            options.url = str(url)
+            return options
+
+        raise ValueError("Unexpected PageInfo state")
+
+
+class BaseSyncPage(BasePage[_T], Generic[_T]):
+    _client: SyncAPIClient = pydantic.PrivateAttr()
+
+    def _set_private_attributes(
+        self,
+        client: SyncAPIClient,
+        model: Type[_T],
+        options: FinalRequestOptions,
+    ) -> None:
+        self._model = model
+        self._client = client
+        self._options = options
+
+    # Pydantic uses a custom `__iter__` method to support casting BaseModels
+    # to dictionaries. e.g. dict(model).
+    # As we want to support `for item in page`, this is inherently incompatible
+    # with the default pydantic behaviour. It is not possible to support both
+    # use cases at once. Fortunately, this is not a big deal as all other pydantic
+    # methods should continue to work as expected as there is an alternative method
+    # to cast a model to a dictionary, model.dict(), which is used internally
+    # by pydantic.
+    def __iter__(self) -> Iterator[_T]:  # type: ignore
+        for page in self.iter_pages():
+            for item in page._get_page_items():
+                yield item
+
+    def iter_pages(self: SyncPageT) -> Iterator[SyncPageT]:
+        page = self
+        while True:
+            yield page
+            if page.has_next_page():
+                page = page.get_next_page()
+            else:
+                return
+
+    def get_next_page(self: SyncPageT) -> SyncPageT:
+        info = self.next_page_info()
+        if not info:
+            raise RuntimeError(
+                "No next page expected; please check `.has_next_page()` before calling `.get_next_page()`."
+            )
+
+        options = self._info_to_options(info)
+        return self._client._request_api_list(
+            self._model, page=self.__class__, options=options
+        )
+
+
+class AsyncPaginator(Generic[_T, AsyncPageT]):
+    def __init__(
+        self,
+        client: AsyncAPIClient,
+        options: FinalRequestOptions,
+        page_cls: Type[AsyncPageT],
+        model: Type[_T],
+    ) -> None:
+        self._model = model
+        self._client = client
+        self._options = options
+        self._page_cls = page_cls
+
+    def __await__(self) -> Generator[Any, None, AsyncPageT]:
+        return self._get_page().__await__()
+
+    async def _get_page(self) -> AsyncPageT:
+        def _parser(resp: AsyncPageT) -> AsyncPageT:
+            resp._set_private_attributes(
+                model=self._model,
+                options=self._options,
+                client=self._client,
+            )
+            return resp
+
+        self._options.post_parser = _parser
+
+        return await self._client.request(self._page_cls, self._options)
+
+    async def __aiter__(self) -> AsyncIterator[_T]:
+        # https://github.com/microsoft/pyright/issues/3464
+        page = cast(
+            AsyncPageT,
+            await self,  # type: ignore
+        )
+        async for item in page:
+            yield item
+
+
+class BaseAsyncPage(BasePage[_T], Generic[_T]):
+    _client: AsyncAPIClient = pydantic.PrivateAttr()
+
+    def _set_private_attributes(
+        self,
+        model: Type[_T],
+        client: AsyncAPIClient,
+        options: FinalRequestOptions,
+    ) -> None:
+        self._model = model
+        self._client = client
+        self._options = options
+
+    async def __aiter__(self) -> AsyncIterator[_T]:
+        async for page in self.iter_pages():
+            for item in page._get_page_items():
+                yield item
+
+    async def iter_pages(self: AsyncPageT) -> AsyncIterator[AsyncPageT]:
+        page = self
+        while True:
+            yield page
+            if page.has_next_page():
+                page = await page.get_next_page()
+            else:
+                return
+
+    async def get_next_page(self: AsyncPageT) -> AsyncPageT:
+        info = self.next_page_info()
+        if not info:
+            raise RuntimeError(
+                "No next page expected; please check `.has_next_page()` before calling `.get_next_page()`."
+            )
+
+        options = self._info_to_options(info)
+        return await self._client._request_api_list(
+            self._model, page=self.__class__, options=options
+        )
+
+
+_HttpxClientT = TypeVar("_HttpxClientT", bound=Union[httpx.Client, httpx.AsyncClient])
+_DefaultStreamT = TypeVar("_DefaultStreamT", bound=Union[Stream[Any], AsyncStream[Any]])
+
+
+class BaseClient(Generic[_HttpxClientT, _DefaultStreamT]):
+    _client: _HttpxClientT
+    _version: str
+    _base_url: URL
+    max_retries: int
+    timeout: Union[float, Timeout, None]
+    _limits: httpx.Limits
+    _proxies: ProxiesTypes | None
+    _transport: Transport | AsyncTransport | None
+    _strict_response_validation: bool
+    _idempotency_header: str | None
+    _default_stream_cls: type[_DefaultStreamT] | None = None
+
+    def __init__(
+        self,
+        *,
+        version: str,
+        base_url: str | URL,
+        _strict_response_validation: bool,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        timeout: float | Timeout | None = DEFAULT_TIMEOUT,
+        limits: httpx.Limits,
+        transport: Transport | AsyncTransport | None,
+        proxies: ProxiesTypes | None,
+        custom_headers: Mapping[str, str] | None = None,
+        custom_query: Mapping[str, object] | None = None,
+    ) -> None:
+        self._version = version
+        self._base_url = self._enforce_trailing_slash(URL(base_url))
+        self.max_retries = max_retries
+        self.timeout = timeout
+        self._limits = limits
+        self._proxies = proxies
+        self._transport = transport
+        self._custom_headers = custom_headers or {}
+        self._custom_query = custom_query or {}
+        self._strict_response_validation = _strict_response_validation
+        self._idempotency_header = None
+
+        if max_retries is None:  # pyright: ignore[reportUnnecessaryComparison]
+            raise TypeError(
+                "max_retries cannot be None. If you want to disable retries, pass `0`; if you want unlimited retries, pass `math.inf` or a very high number; if you want the default behavior, pass `openai.DEFAULT_MAX_RETRIES`"
+            )
+
+    def _enforce_trailing_slash(self, url: URL) -> URL:
+        if url.raw_path.endswith(b"/"):
+            return url
+        return url.copy_with(raw_path=url.raw_path + b"/")
+
+    def _make_status_error_from_response(
+        self,
+        response: httpx.Response,
+    ) -> APIStatusError:
+        if response.is_closed and not response.is_stream_consumed:
+            # We can't read the response body as it has been closed
+            # before it was read. This can happen if an event hook
+            # raises a status error.
+            body = None
+            err_msg = f"Error code: {response.status_code}"
+        else:
+            err_text = response.text.strip()
+            body = err_text
+
+            try:
+                body = json.loads(err_text)
+                err_msg = f"Error code: {response.status_code} - {body}"
+            except Exception:
+                err_msg = err_text or f"Error code: {response.status_code}"
+
+        return self._make_status_error(err_msg, body=body, response=response)
+
+    def _make_status_error(
+        self,
+        err_msg: str,
+        *,
+        body: object,
+        response: httpx.Response,
+    ) -> _exceptions.APIStatusError:
+        raise NotImplementedError()
+
+    def _remaining_retries(
+        self,
+        remaining_retries: Optional[int],
+        options: FinalRequestOptions,
+    ) -> int:
+        return (
+            remaining_retries
+            if remaining_retries is not None
+            else options.get_max_retries(self.max_retries)
+        )
+
+    def _build_headers(self, options: FinalRequestOptions) -> httpx.Headers:
+        custom_headers = options.headers or {}
+        headers_dict = _merge_mappings(self.default_headers, custom_headers)
+        self._validate_headers(headers_dict, custom_headers)
+
+        # headers are case-insensitive while dictionaries are not.
+        headers = httpx.Headers(headers_dict)
+
+        idempotency_header = self._idempotency_header
+        if (
+            idempotency_header
+            and options.method.lower() != "get"
+            and idempotency_header not in headers
+        ):
+            headers[idempotency_header] = (
+                options.idempotency_key or self._idempotency_key()
+            )
+
+        return headers
+
+    def _prepare_url(self, url: str) -> URL:
+        """
+        Merge a URL argument together with any 'base_url' on the client,
+        to create the URL used for the outgoing request.
+        """
+        # Copied from httpx's `_merge_url` method.
+        merge_url = URL(url)
+        if merge_url.is_relative_url:
+            merge_raw_path = self.base_url.raw_path + merge_url.raw_path.lstrip(b"/")
+            return self.base_url.copy_with(raw_path=merge_raw_path)
+
+        return merge_url
+
+    def _make_sse_decoder(self) -> SSEDecoder | SSEBytesDecoder:
+        return SSEDecoder()
+
+    def _build_request(
+        self,
+        options: FinalRequestOptions,
+    ) -> httpx.Request:
+        if log.isEnabledFor(logging.DEBUG):
+            log.debug("Request options: %s", model_dump(options, exclude_unset=True))
+
+        kwargs: dict[str, Any] = {}
+
+        json_data = options.json_data
+        if options.extra_json is not None:
+            if json_data is None:
+                json_data = cast(Body, options.extra_json)
+            elif is_mapping(json_data):
+                json_data = _merge_mappings(json_data, options.extra_json)
+            else:
+                raise RuntimeError(
+                    f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`"
+                )
+
+        headers = self._build_headers(options)
+        params = _merge_mappings(self._custom_query, options.params)
+        content_type = headers.get("Content-Type")
+
+        # If the given Content-Type header is multipart/form-data then it
+        # has to be removed so that httpx can generate the header with
+        # additional information for us as it has to be in this form
+        # for the server to be able to correctly parse the request:
+        # multipart/form-data; boundary=---abc--
+        if content_type is not None and content_type.startswith("multipart/form-data"):
+            if "boundary" not in content_type:
+                # only remove the header if the boundary hasn't been explicitly set
+                # as the caller doesn't want httpx to come up with their own boundary
+                headers.pop("Content-Type")
+
+            # As we are now sending multipart/form-data instead of application/json
+            # we need to tell httpx to use it, https://www.python-httpx.org/advanced/#multipart-file-encoding
+            if json_data:
+                if not is_dict(json_data):
+                    raise TypeError(
+                        f"Expected query input to be a dictionary for multipart requests but got {type(json_data)} instead."
+                    )
+                kwargs["data"] = self._serialize_multipartform(json_data)
+
+        # TODO: report this error to httpx
+        return self._client.build_request(  # pyright: ignore[reportUnknownMemberType]
+            headers=headers,
+            timeout=self.timeout
+            if isinstance(options.timeout, NotGiven)
+            else options.timeout,
+            method=options.method,
+            url=self._prepare_url(options.url),
+            # the `Query` type that we use is incompatible with qs'
+            # `Params` type as it needs to be typed as `Mapping[str, object]`
+            # so that passing a `TypedDict` doesn't cause an error.
+            # https://github.com/microsoft/pyright/issues/3526#event-6715453066
+            params=self.qs.stringify(cast(Mapping[str, Any], params))
+            if params
+            else None,
+            json=json_data,
+            files=options.files,
+            **kwargs,
+        )
+
+    def _serialize_multipartform(
+        self, data: Mapping[object, object]
+    ) -> dict[str, object]:
+        items = self.qs.stringify_items(
+            # TODO: type ignore is required as stringify_items is well typed but we can't be
+            # well typed without heavy validation.
+            data,  # type: ignore
+            array_format="brackets",
+        )
+        serialized: dict[str, object] = {}
+        for key, value in items:
+            existing = serialized.get(key)
+
+            if not existing:
+                serialized[key] = value
+                continue
+
+            # If a value has already been set for this key then that
+            # means we're sending data like `array[]=[1, 2, 3]` and we
+            # need to tell httpx that we want to send multiple values with
+            # the same key which is done by using a list or a tuple.
+            #
+            # Note: 2d arrays should never result in the same key at both
+            # levels so it's safe to assume that if the value is a list,
+            # it was because we changed it to be a list.
+            if is_list(existing):
+                existing.append(value)
+            else:
+                serialized[key] = [existing, value]
+
+        return serialized
+
+    def _maybe_override_cast_to(
+        self, cast_to: type[ResponseT], options: FinalRequestOptions
+    ) -> type[ResponseT]:
+        if not is_given(options.headers):
+            return cast_to
+
+        # make a copy of the headers so we don't mutate user-input
+        headers = dict(options.headers)
+
+        # we internally support defining a temporary header to override the
+        # default `cast_to` type for use with `.with_raw_response` and `.with_streaming_response`
+        # see _response.py for implementation details
+        override_cast_to = headers.pop(OVERRIDE_CAST_TO_HEADER, NOT_GIVEN)
+        if is_given(override_cast_to):
+            options.headers = headers
+            return cast(Type[ResponseT], override_cast_to)
+
+        return cast_to
+
+    def _should_stream_response_body(self, request: httpx.Request) -> bool:
+        return request.headers.get(RAW_RESPONSE_HEADER) == "stream"  # type: ignore[no-any-return]
+
+    def _process_response_data(
+        self,
+        *,
+        data: object,
+        cast_to: type[ResponseT],
+        response: httpx.Response,
+    ) -> ResponseT:
+        if data is None:
+            return cast(ResponseT, None)
+
+        if cast_to is object:
+            return cast(ResponseT, data)
+
+        try:
+            if inspect.isclass(cast_to) and issubclass(cast_to, ModelBuilderProtocol):
+                return cast(ResponseT, cast_to.build(response=response, data=data))
+
+            if self._strict_response_validation:
+                return cast(ResponseT, validate_type(type_=cast_to, value=data))
+
+            return cast(ResponseT, construct_type(type_=cast_to, value=data))
+        except pydantic.ValidationError as err:
+            raise APIResponseValidationError(response=response, body=data) from err
+
+    @property
+    def qs(self) -> Querystring:
+        return Querystring()
+
+    @property
+    def custom_auth(self) -> httpx.Auth | None:
+        return None
+
+    @property
+    def auth_headers(self) -> dict[str, str]:
+        return {}
+
+    @property
+    def default_headers(self) -> dict[str, str | Omit]:
+        return {
+            "Accept": "application/json",
+            "Content-Type": "application/json",
+            "User-Agent": self.user_agent,
+            **self.platform_headers(),
+            **self.auth_headers,
+            **self._custom_headers,
+        }
+
+    def _validate_headers(
+        self,
+        headers: Headers,  # noqa: ARG002
+        custom_headers: Headers,  # noqa: ARG002
+    ) -> None:
+        """Validate the given default headers and custom headers.
+
+        Does nothing by default.
+        """
+        return
+
+    @property
+    def user_agent(self) -> str:
+        return f"{self.__class__.__name__}/Python {self._version}"
+
+    @property
+    def base_url(self) -> URL:
+        return self._base_url
+
+    @base_url.setter
+    def base_url(self, url: URL | str) -> None:
+        self._base_url = self._enforce_trailing_slash(
+            url if isinstance(url, URL) else URL(url)
+        )
+
+    def platform_headers(self) -> Dict[str, str]:
+        return platform_headers(self._version)
+
+    def _parse_retry_after_header(
+        self, response_headers: Optional[httpx.Headers] = None
+    ) -> float | None:
+        """Returns a float of the number of seconds (not milliseconds) to wait after retrying, or None if unspecified.
+
+        About the Retry-After header: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After
+        See also  https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After#syntax
+        """
+        if response_headers is None:
+            return None
+
+        # First, try the non-standard `retry-after-ms` header for milliseconds,
+        # which is more precise than integer-seconds `retry-after`
+        try:
+            retry_ms_header = response_headers.get("retry-after-ms", None)
+            return float(retry_ms_header) / 1000
+        except (TypeError, ValueError):
+            pass
+
+        # Next, try parsing `retry-after` header as seconds (allowing nonstandard floats).
+        retry_header = response_headers.get("retry-after")
+        try:
+            # note: the spec indicates that this should only ever be an integer
+            # but if someone sends a float there's no reason for us to not respect it
+            return float(retry_header)
+        except (TypeError, ValueError):
+            pass
+
+        # Last, try parsing `retry-after` as a date.
+        retry_date_tuple = email.utils.parsedate_tz(retry_header)
+        if retry_date_tuple is None:
+            return None
+
+        retry_date = email.utils.mktime_tz(retry_date_tuple)
+        return float(retry_date - time.time())
+
+    def _calculate_retry_timeout(
+        self,
+        remaining_retries: int,
+        options: FinalRequestOptions,
+        response_headers: Optional[httpx.Headers] = None,
+    ) -> float:
+        max_retries = options.get_max_retries(self.max_retries)
+
+        # If the API asks us to wait a certain amount of time (and it's a reasonable amount), just do what it says.
+        retry_after = self._parse_retry_after_header(response_headers)
+        if retry_after is not None and 0 < retry_after <= 60:
+            return retry_after
+
+        nb_retries = max_retries - remaining_retries
+
+        # Apply exponential backoff, but not more than the max.
+        sleep_seconds = min(INITIAL_RETRY_DELAY * pow(2.0, nb_retries), MAX_RETRY_DELAY)
+
+        # Apply some jitter, plus-or-minus half a second.
+        jitter = 1 - 0.25 * random()
+        timeout = sleep_seconds * jitter
+        return timeout if timeout >= 0 else 0
+
+    def _should_retry(self, response: httpx.Response) -> bool:
+        # Note: this is not a standard header
+        should_retry_header = response.headers.get("x-should-retry")
+
+        # If the server explicitly says whether or not to retry, obey.
+        if should_retry_header == "true":
+            log.debug("Retrying as header `x-should-retry` is set to `true`")
+            return True
+        if should_retry_header == "false":
+            log.debug("Not retrying as header `x-should-retry` is set to `false`")
+            return False
+
+        # Retry on request timeouts.
+        if response.status_code == 408:
+            log.debug("Retrying due to status code %i", response.status_code)
+            return True
+
+        # Retry on lock timeouts.
+        if response.status_code == 409:
+            log.debug("Retrying due to status code %i", response.status_code)
+            return True
+
+        # Retry on rate limits.
+        if response.status_code == 429:
+            log.debug("Retrying due to status code %i", response.status_code)
+            return True
+
+        # Retry internal errors.
+        if response.status_code >= 500:
+            log.debug("Retrying due to status code %i", response.status_code)
+            return True
+
+        log.debug("Not retrying")
+        return False
+
+    def _idempotency_key(self) -> str:
+        return f"stainless-python-retry-{uuid.uuid4()}"
+
+
+class _DefaultHttpxClient(httpx.Client):
+    def __init__(self, **kwargs: Any) -> None:
+        kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+        kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
+        kwargs.setdefault("follow_redirects", True)
+        super().__init__(**kwargs)
+
+
+if TYPE_CHECKING:
+    DefaultHttpxClient = httpx.Client
+    """An alias to `httpx.Client` that provides the same defaults that this SDK
+    uses internally.
+
+    This is useful because overriding the `http_client` with your own instance of
+    `httpx.Client` will result in httpx's defaults being used, not ours.
+    """
+else:
+    DefaultHttpxClient = _DefaultHttpxClient
+
+
+class SyncHttpxClientWrapper(DefaultHttpxClient):
+    def __del__(self) -> None:
+        try:
+            self.close()
+        except Exception:
+            pass
+
+
+class SyncAPIClient(BaseClient[httpx.Client, Stream[Any]]):
+    _client: httpx.Client
+    _default_stream_cls: type[Stream[Any]] | None = None
+
+    def __init__(
+        self,
+        *,
+        version: str,
+        base_url: str | URL,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        transport: Transport | None = None,
+        proxies: ProxiesTypes | None = None,
+        limits: Limits | None = None,
+        http_client: httpx.Client | None = None,
+        custom_headers: Mapping[str, str] | None = None,
+        custom_query: Mapping[str, object] | None = None,
+        _strict_response_validation: bool,
+    ) -> None:
+        if limits is not None:
+            warnings.warn(
+                "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
+                category=DeprecationWarning,
+                stacklevel=3,
+            )
+            if http_client is not None:
+                raise ValueError(
+                    "The `http_client` argument is mutually exclusive with `connection_pool_limits`"
+                )
+        else:
+            limits = DEFAULT_CONNECTION_LIMITS
+
+        if transport is not None:
+            warnings.warn(
+                "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
+                category=DeprecationWarning,
+                stacklevel=3,
+            )
+            if http_client is not None:
+                raise ValueError(
+                    "The `http_client` argument is mutually exclusive with `transport`"
+                )
+
+        if proxies is not None:
+            warnings.warn(
+                "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
+                category=DeprecationWarning,
+                stacklevel=3,
+            )
+            if http_client is not None:
+                raise ValueError(
+                    "The `http_client` argument is mutually exclusive with `proxies`"
+                )
+
+        if not is_given(timeout):
+            # if the user passed in a custom http client with a non-default
+            # timeout set then we use that timeout.
+            #
+            # note: there is an edge case here where the user passes in a client
+            # where they've explicitly set the timeout to match the default timeout
+            # as this check is structural, meaning that we'll think they didn't
+            # pass in a timeout and will ignore it
+            if http_client and http_client.timeout != HTTPX_DEFAULT_TIMEOUT:
+                timeout = http_client.timeout
+            else:
+                timeout = DEFAULT_TIMEOUT
+
+        if http_client is not None and not isinstance(
+            http_client, httpx.Client
+        ):  # pyright: ignore[reportUnnecessaryIsInstance]
+            raise TypeError(
+                f"Invalid `http_client` argument; Expected an instance of `httpx.Client` but got {type(http_client)}"
+            )
+
+        super().__init__(
+            version=version,
+            limits=limits,
+            # cast to a valid type because mypy doesn't understand our type narrowing
+            timeout=cast(Timeout, timeout),
+            proxies=proxies,
+            base_url=base_url,
+            transport=transport,
+            max_retries=max_retries,
+            custom_query=custom_query,
+            custom_headers=custom_headers,
+            _strict_response_validation=_strict_response_validation,
+        )
+        self._client = http_client or SyncHttpxClientWrapper(
+            base_url=base_url,
+            # cast to a valid type because mypy doesn't understand our type narrowing
+            timeout=cast(Timeout, timeout),
+            proxies=proxies,
+            transport=transport,
+            limits=limits,
+            follow_redirects=True,
+        )
+
+    def is_closed(self) -> bool:
+        return self._client.is_closed
+
+    def close(self) -> None:
+        """Close the underlying HTTPX client.
+
+        The client will *not* be usable after this.
+        """
+        # If an error is thrown while constructing a client, self._client
+        # may not be present
+        if hasattr(self, "_client"):
+            self._client.close()
+
+    def __enter__(self: _T) -> _T:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        self.close()
+
+    def _prepare_options(
+        self,
+        options: FinalRequestOptions,  # noqa: ARG002
+    ) -> None:
+        """Hook for mutating the given options"""
+        return None
+
+    def _prepare_request(
+        self,
+        request: httpx.Request,  # noqa: ARG002
+    ) -> None:
+        """This method is used as a callback for mutating the `Request` object
+        after it has been constructed.
+        This is useful for cases where you want to add certain headers based off of
+        the request properties, e.g. `url`, `method` etc.
+        """
+        return None
+
+    @overload
+    def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        remaining_retries: Optional[int] = None,
+        *,
+        stream: Literal[True],
+        stream_cls: Type[_StreamT],
+    ) -> _StreamT:
+        ...
+
+    @overload
+    def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        remaining_retries: Optional[int] = None,
+        *,
+        stream: Literal[False] = False,
+    ) -> ResponseT:
+        ...
+
+    @overload
+    def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        remaining_retries: Optional[int] = None,
+        *,
+        stream: bool = False,
+        stream_cls: Type[_StreamT] | None = None,
+    ) -> ResponseT | _StreamT:
+        ...
+
+    def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        remaining_retries: Optional[int] = None,
+        *,
+        stream: bool = False,
+        stream_cls: type[_StreamT] | None = None,
+    ) -> ResponseT | _StreamT:
+        return self._request(
+            cast_to=cast_to,
+            options=options,
+            stream=stream,
+            stream_cls=stream_cls,
+            remaining_retries=remaining_retries,
+        )
+
+    def _request(
+        self,
+        *,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        remaining_retries: int | None,
+        stream: bool,
+        stream_cls: type[_StreamT] | None,
+    ) -> ResponseT | _StreamT:
+        cast_to = self._maybe_override_cast_to(cast_to, options)
+        self._prepare_options(options)
+
+        retries = self._remaining_retries(remaining_retries, options)
+        request = self._build_request(options)
+        self._prepare_request(request)
+
+        kwargs: HttpxSendArgs = {}
+        if self.custom_auth is not None:
+            kwargs["auth"] = self.custom_auth
+
+        log.debug("Sending HTTP Request: %s %s", request.method, request.url)
+
+        try:
+            response = self._client.send(
+                request,
+                stream=stream or self._should_stream_response_body(request=request),
+                **kwargs,
+            )
+        except httpx.TimeoutException as err:
+            log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+            if retries > 0:
+                return self._retry_request(
+                    options,
+                    cast_to,
+                    retries,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    response_headers=None,
+                )
+
+            log.debug("Raising timeout error")
+            raise APITimeoutError(request=request) from err
+        except Exception as err:
+            log.debug("Encountered Exception", exc_info=True)
+
+            if retries > 0:
+                return self._retry_request(
+                    options,
+                    cast_to,
+                    retries,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    response_headers=None,
+                )
+
+            log.debug("Raising connection error")
+            raise APIConnectionError(request=request) from err
+
+        log.debug(
+            'HTTP Response: %s %s "%i %s" %s',
+            request.method,
+            request.url,
+            response.status_code,
+            response.reason_phrase,
+            response.headers,
+        )
+        log.debug("request_id: %s", response.headers.get("x-request-id"))
+
+        try:
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+            log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+            if retries > 0 and self._should_retry(err.response):
+                err.response.close()
+                return self._retry_request(
+                    options,
+                    cast_to,
+                    retries,
+                    err.response.headers,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                )
+
+            # If the response is streamed then we need to explicitly read the response
+            # to completion before attempting to access the response text.
+            if not err.response.is_closed:
+                err.response.read()
+
+            log.debug("Re-raising status error")
+            raise self._make_status_error_from_response(err.response) from None
+
+        return self._process_response(
+            cast_to=cast_to,
+            options=options,
+            response=response,
+            stream=stream,
+            stream_cls=stream_cls,
+        )
+
+    def _retry_request(
+        self,
+        options: FinalRequestOptions,
+        cast_to: Type[ResponseT],
+        remaining_retries: int,
+        response_headers: httpx.Headers | None,
+        *,
+        stream: bool,
+        stream_cls: type[_StreamT] | None,
+    ) -> ResponseT | _StreamT:
+        remaining = remaining_retries - 1
+        if remaining == 1:
+            log.debug("1 retry left")
+        else:
+            log.debug("%i retries left", remaining)
+
+        timeout = self._calculate_retry_timeout(remaining, options, response_headers)
+        log.info("Retrying request to %s in %f seconds", options.url, timeout)
+
+        # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
+        # different thread if necessary.
+        time.sleep(timeout)
+
+        return self._request(
+            options=options,
+            cast_to=cast_to,
+            remaining_retries=remaining,
+            stream=stream,
+            stream_cls=stream_cls,
+        )
+
+    def _process_response(
+        self,
+        *,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        response: httpx.Response,
+        stream: bool,
+        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+    ) -> ResponseT:
+        if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
+            return cast(
+                ResponseT,
+                LegacyAPIResponse(
+                    raw=response,
+                    client=self,
+                    cast_to=cast_to,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                ),
+            )
+
+        origin = get_origin(cast_to) or cast_to
+
+        if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
+            if not issubclass(origin, APIResponse):
+                raise TypeError(
+                    f"API Response types must subclass {APIResponse}; Received {origin}"
+                )
+
+            response_cls = cast("type[BaseAPIResponse[Any]]", cast_to)
+            return cast(
+                ResponseT,
+                response_cls(
+                    raw=response,
+                    client=self,
+                    cast_to=extract_response_type(response_cls),
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                ),
+            )
+
+        if cast_to == httpx.Response:
+            return cast(ResponseT, response)
+
+        api_response = APIResponse(
+            raw=response,
+            client=self,
+            cast_to=cast(
+                "type[ResponseT]", cast_to
+            ),  # pyright: ignore[reportUnnecessaryCast]
+            stream=stream,
+            stream_cls=stream_cls,
+            options=options,
+        )
+        if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
+            return cast(ResponseT, api_response)
+
+        return api_response.parse()
+
+    def _request_api_list(
+        self,
+        model: Type[object],
+        page: Type[SyncPageT],
+        options: FinalRequestOptions,
+    ) -> SyncPageT:
+        def _parser(resp: SyncPageT) -> SyncPageT:
+            resp._set_private_attributes(
+                client=self,
+                model=model,
+                options=options,
+            )
+            return resp
+
+        options.post_parser = _parser
+
+        return self.request(page, options, stream=False)
+
+    @overload
+    def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: Literal[False] = False,
+    ) -> ResponseT:
+        ...
+
+    @overload
+    def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: Literal[True],
+        stream_cls: type[_StreamT],
+    ) -> _StreamT:
+        ...
+
+    @overload
+    def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: bool,
+        stream_cls: type[_StreamT] | None = None,
+    ) -> ResponseT | _StreamT:
+        ...
+
+    def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: bool = False,
+        stream_cls: type[_StreamT] | None = None,
+    ) -> ResponseT | _StreamT:
+        opts = FinalRequestOptions.construct(method="get", url=path, **options)
+        # cast is required because mypy complains about returning Any even though
+        # it understands the type variables
+        return cast(
+            ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)
+        )
+
+    @overload
+    def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+        files: RequestFiles | None = None,
+        stream: Literal[False] = False,
+    ) -> ResponseT:
+        ...
+
+    @overload
+    def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+        files: RequestFiles | None = None,
+        stream: Literal[True],
+        stream_cls: type[_StreamT],
+    ) -> _StreamT:
+        ...
+
+    @overload
+    def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+        files: RequestFiles | None = None,
+        stream: bool,
+        stream_cls: type[_StreamT] | None = None,
+    ) -> ResponseT | _StreamT:
+        ...
+
+    def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+        files: RequestFiles | None = None,
+        stream: bool = False,
+        stream_cls: type[_StreamT] | None = None,
+    ) -> ResponseT | _StreamT:
+        opts = FinalRequestOptions.construct(
+            method="post",
+            url=path,
+            json_data=body,
+            files=to_httpx_files(files),
+            **options,
+        )
+        return cast(
+            ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)
+        )
+
+    def patch(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+    ) -> ResponseT:
+        opts = FinalRequestOptions.construct(
+            method="patch", url=path, json_data=body, **options
+        )
+        return self.request(cast_to, opts)
+
+    def put(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        files: RequestFiles | None = None,
+        options: RequestOptions = {},
+    ) -> ResponseT:
+        opts = FinalRequestOptions.construct(
+            method="put",
+            url=path,
+            json_data=body,
+            files=to_httpx_files(files),
+            **options,
+        )
+        return self.request(cast_to, opts)
+
+    def delete(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+    ) -> ResponseT:
+        opts = FinalRequestOptions.construct(
+            method="delete", url=path, json_data=body, **options
+        )
+        return self.request(cast_to, opts)
+
+    def get_api_list(
+        self,
+        path: str,
+        *,
+        model: Type[object],
+        page: Type[SyncPageT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+        method: str = "get",
+    ) -> SyncPageT:
+        opts = FinalRequestOptions.construct(
+            method=method, url=path, json_data=body, **options
+        )
+        return self._request_api_list(model, page, opts)
+
+
+class _DefaultAsyncHttpxClient(httpx.AsyncClient):
+    def __init__(self, **kwargs: Any) -> None:
+        kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+        kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
+        kwargs.setdefault("follow_redirects", True)
+        super().__init__(**kwargs)
+
+
+if TYPE_CHECKING:
+    DefaultAsyncHttpxClient = httpx.AsyncClient
+    """An alias to `httpx.AsyncClient` that provides the same defaults that this SDK
+    uses internally.
+
+    This is useful because overriding the `http_client` with your own instance of
+    `httpx.AsyncClient` will result in httpx's defaults being used, not ours.
+    """
+else:
+    DefaultAsyncHttpxClient = _DefaultAsyncHttpxClient
+
+
+class AsyncHttpxClientWrapper(DefaultAsyncHttpxClient):
+    def __del__(self) -> None:
+        try:
+            # TODO(someday): support non asyncio runtimes here
+            asyncio.get_running_loop().create_task(self.aclose())
+        except Exception:
+            pass
+
+
+class AsyncAPIClient(BaseClient[httpx.AsyncClient, AsyncStream[Any]]):
+    _client: httpx.AsyncClient
+    _default_stream_cls: type[AsyncStream[Any]] | None = None
+
+    def __init__(
+        self,
+        *,
+        version: str,
+        base_url: str | URL,
+        _strict_response_validation: bool,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        transport: AsyncTransport | None = None,
+        proxies: ProxiesTypes | None = None,
+        limits: Limits | None = None,
+        http_client: httpx.AsyncClient | None = None,
+        custom_headers: Mapping[str, str] | None = None,
+        custom_query: Mapping[str, object] | None = None,
+    ) -> None:
+        if limits is not None:
+            warnings.warn(
+                "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
+                category=DeprecationWarning,
+                stacklevel=3,
+            )
+            if http_client is not None:
+                raise ValueError(
+                    "The `http_client` argument is mutually exclusive with `connection_pool_limits`"
+                )
+        else:
+            limits = DEFAULT_CONNECTION_LIMITS
+
+        if transport is not None:
+            warnings.warn(
+                "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
+                category=DeprecationWarning,
+                stacklevel=3,
+            )
+            if http_client is not None:
+                raise ValueError(
+                    "The `http_client` argument is mutually exclusive with `transport`"
+                )
+
+        if proxies is not None:
+            warnings.warn(
+                "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
+                category=DeprecationWarning,
+                stacklevel=3,
+            )
+            if http_client is not None:
+                raise ValueError(
+                    "The `http_client` argument is mutually exclusive with `proxies`"
+                )
+
+        if not is_given(timeout):
+            # if the user passed in a custom http client with a non-default
+            # timeout set then we use that timeout.
+            #
+            # note: there is an edge case here where the user passes in a client
+            # where they've explicitly set the timeout to match the default timeout
+            # as this check is structural, meaning that we'll think they didn't
+            # pass in a timeout and will ignore it
+            if http_client and http_client.timeout != HTTPX_DEFAULT_TIMEOUT:
+                timeout = http_client.timeout
+            else:
+                timeout = DEFAULT_TIMEOUT
+
+        if http_client is not None and not isinstance(
+            http_client, httpx.AsyncClient
+        ):  # pyright: ignore[reportUnnecessaryIsInstance]
+            raise TypeError(
+                f"Invalid `http_client` argument; Expected an instance of `httpx.AsyncClient` but got {type(http_client)}"
+            )
+
+        super().__init__(
+            version=version,
+            base_url=base_url,
+            limits=limits,
+            # cast to a valid type because mypy doesn't understand our type narrowing
+            timeout=cast(Timeout, timeout),
+            proxies=proxies,
+            transport=transport,
+            max_retries=max_retries,
+            custom_query=custom_query,
+            custom_headers=custom_headers,
+            _strict_response_validation=_strict_response_validation,
+        )
+        self._client = http_client or AsyncHttpxClientWrapper(
+            base_url=base_url,
+            # cast to a valid type because mypy doesn't understand our type narrowing
+            timeout=cast(Timeout, timeout),
+            proxies=proxies,
+            transport=transport,
+            limits=limits,
+            follow_redirects=True,
+        )
+
+    def is_closed(self) -> bool:
+        return self._client.is_closed
+
+    async def close(self) -> None:
+        """Close the underlying HTTPX client.
+
+        The client will *not* be usable after this.
+        """
+        await self._client.aclose()
+
+    async def __aenter__(self: _T) -> _T:
+        return self
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        await self.close()
+
+    async def _prepare_options(
+        self,
+        options: FinalRequestOptions,  # noqa: ARG002
+    ) -> None:
+        """Hook for mutating the given options"""
+        return None
+
+    async def _prepare_request(
+        self,
+        request: httpx.Request,  # noqa: ARG002
+    ) -> None:
+        """This method is used as a callback for mutating the `Request` object
+        after it has been constructed.
+        This is useful for cases where you want to add certain headers based off of
+        the request properties, e.g. `url`, `method` etc.
+        """
+        return None
+
+    @overload
+    async def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        *,
+        stream: Literal[False] = False,
+        remaining_retries: Optional[int] = None,
+    ) -> ResponseT:
+        ...
+
+    @overload
+    async def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        *,
+        stream: Literal[True],
+        stream_cls: type[_AsyncStreamT],
+        remaining_retries: Optional[int] = None,
+    ) -> _AsyncStreamT:
+        ...
+
+    @overload
+    async def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        *,
+        stream: bool,
+        stream_cls: type[_AsyncStreamT] | None = None,
+        remaining_retries: Optional[int] = None,
+    ) -> ResponseT | _AsyncStreamT:
+        ...
+
+    async def request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        *,
+        stream: bool = False,
+        stream_cls: type[_AsyncStreamT] | None = None,
+        remaining_retries: Optional[int] = None,
+    ) -> ResponseT | _AsyncStreamT:
+        return await self._request(
+            cast_to=cast_to,
+            options=options,
+            stream=stream,
+            stream_cls=stream_cls,
+            remaining_retries=remaining_retries,
+        )
+
+    async def _request(
+        self,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        *,
+        stream: bool,
+        stream_cls: type[_AsyncStreamT] | None,
+        remaining_retries: int | None,
+    ) -> ResponseT | _AsyncStreamT:
+        cast_to = self._maybe_override_cast_to(cast_to, options)
+        await self._prepare_options(options)
+
+        retries = self._remaining_retries(remaining_retries, options)
+        request = self._build_request(options)
+        await self._prepare_request(request)
+
+        kwargs: HttpxSendArgs = {}
+        if self.custom_auth is not None:
+            kwargs["auth"] = self.custom_auth
+
+        try:
+            response = await self._client.send(
+                request,
+                stream=stream or self._should_stream_response_body(request=request),
+                **kwargs,
+            )
+        except httpx.TimeoutException as err:
+            log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+            if retries > 0:
+                return await self._retry_request(
+                    options,
+                    cast_to,
+                    retries,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    response_headers=None,
+                )
+
+            log.debug("Raising timeout error")
+            raise APITimeoutError(request=request) from err
+        except Exception as err:
+            log.debug("Encountered Exception", exc_info=True)
+
+            if retries > 0:
+                return await self._retry_request(
+                    options,
+                    cast_to,
+                    retries,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    response_headers=None,
+                )
+
+            log.debug("Raising connection error")
+            raise APIConnectionError(request=request) from err
+
+        log.debug(
+            'HTTP Request: %s %s "%i %s"',
+            request.method,
+            request.url,
+            response.status_code,
+            response.reason_phrase,
+        )
+
+        try:
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+            log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+            if retries > 0 and self._should_retry(err.response):
+                await err.response.aclose()
+                return await self._retry_request(
+                    options,
+                    cast_to,
+                    retries,
+                    err.response.headers,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                )
+
+            # If the response is streamed then we need to explicitly read the response
+            # to completion before attempting to access the response text.
+            if not err.response.is_closed:
+                await err.response.aread()
+
+            log.debug("Re-raising status error")
+            raise self._make_status_error_from_response(err.response) from None
+
+        return await self._process_response(
+            cast_to=cast_to,
+            options=options,
+            response=response,
+            stream=stream,
+            stream_cls=stream_cls,
+        )
+
+    async def _retry_request(
+        self,
+        options: FinalRequestOptions,
+        cast_to: Type[ResponseT],
+        remaining_retries: int,
+        response_headers: httpx.Headers | None,
+        *,
+        stream: bool,
+        stream_cls: type[_AsyncStreamT] | None,
+    ) -> ResponseT | _AsyncStreamT:
+        remaining = remaining_retries - 1
+        if remaining == 1:
+            log.debug("1 retry left")
+        else:
+            log.debug("%i retries left", remaining)
+
+        timeout = self._calculate_retry_timeout(remaining, options, response_headers)
+        log.info("Retrying request to %s in %f seconds", options.url, timeout)
+
+        await anyio.sleep(timeout)
+
+        return await self._request(
+            options=options,
+            cast_to=cast_to,
+            remaining_retries=remaining,
+            stream=stream,
+            stream_cls=stream_cls,
+        )
+
+    async def _process_response(
+        self,
+        *,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        response: httpx.Response,
+        stream: bool,
+        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+    ) -> ResponseT:
+        if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
+            return cast(
+                ResponseT,
+                LegacyAPIResponse(
+                    raw=response,
+                    client=self,
+                    cast_to=cast_to,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                ),
+            )
+
+        origin = get_origin(cast_to) or cast_to
+
+        if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
+            if not issubclass(origin, AsyncAPIResponse):
+                raise TypeError(
+                    f"API Response types must subclass {AsyncAPIResponse}; Received {origin}"
+                )
+
+            response_cls = cast("type[BaseAPIResponse[Any]]", cast_to)
+            return cast(
+                "ResponseT",
+                response_cls(
+                    raw=response,
+                    client=self,
+                    cast_to=extract_response_type(response_cls),
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                ),
+            )
+
+        if cast_to == httpx.Response:
+            return cast(ResponseT, response)
+
+        api_response = AsyncAPIResponse(
+            raw=response,
+            client=self,
+            cast_to=cast(
+                "type[ResponseT]", cast_to
+            ),  # pyright: ignore[reportUnnecessaryCast]
+            stream=stream,
+            stream_cls=stream_cls,
+            options=options,
+        )
+        if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
+            return cast(ResponseT, api_response)
+
+        return await api_response.parse()
+
+    def _request_api_list(
+        self,
+        model: Type[_T],
+        page: Type[AsyncPageT],
+        options: FinalRequestOptions,
+    ) -> AsyncPaginator[_T, AsyncPageT]:
+        return AsyncPaginator(client=self, options=options, page_cls=page, model=model)
+
+    @overload
+    async def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: Literal[False] = False,
+    ) -> ResponseT:
+        ...
+
+    @overload
+    async def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: Literal[True],
+        stream_cls: type[_AsyncStreamT],
+    ) -> _AsyncStreamT:
+        ...
+
+    @overload
+    async def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: bool,
+        stream_cls: type[_AsyncStreamT] | None = None,
+    ) -> ResponseT | _AsyncStreamT:
+        ...
+
+    async def get(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        options: RequestOptions = {},
+        stream: bool = False,
+        stream_cls: type[_AsyncStreamT] | None = None,
+    ) -> ResponseT | _AsyncStreamT:
+        opts = FinalRequestOptions.construct(method="get", url=path, **options)
+        return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)
+
+    @overload
+    async def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        files: RequestFiles | None = None,
+        options: RequestOptions = {},
+        stream: Literal[False] = False,
+    ) -> ResponseT:
+        ...
+
+    @overload
+    async def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        files: RequestFiles | None = None,
+        options: RequestOptions = {},
+        stream: Literal[True],
+        stream_cls: type[_AsyncStreamT],
+    ) -> _AsyncStreamT:
+        ...
+
+    @overload
+    async def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        files: RequestFiles | None = None,
+        options: RequestOptions = {},
+        stream: bool,
+        stream_cls: type[_AsyncStreamT] | None = None,
+    ) -> ResponseT | _AsyncStreamT:
+        ...
+
+    async def post(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        files: RequestFiles | None = None,
+        options: RequestOptions = {},
+        stream: bool = False,
+        stream_cls: type[_AsyncStreamT] | None = None,
+    ) -> ResponseT | _AsyncStreamT:
+        opts = FinalRequestOptions.construct(
+            method="post",
+            url=path,
+            json_data=body,
+            files=await async_to_httpx_files(files),
+            **options,
+        )
+        return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)
+
+    async def patch(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+    ) -> ResponseT:
+        opts = FinalRequestOptions.construct(
+            method="patch", url=path, json_data=body, **options
+        )
+        return await self.request(cast_to, opts)
+
+    async def put(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        files: RequestFiles | None = None,
+        options: RequestOptions = {},
+    ) -> ResponseT:
+        opts = FinalRequestOptions.construct(
+            method="put",
+            url=path,
+            json_data=body,
+            files=await async_to_httpx_files(files),
+            **options,
+        )
+        return await self.request(cast_to, opts)
+
+    async def delete(
+        self,
+        path: str,
+        *,
+        cast_to: Type[ResponseT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+    ) -> ResponseT:
+        opts = FinalRequestOptions.construct(
+            method="delete", url=path, json_data=body, **options
+        )
+        return await self.request(cast_to, opts)
+
+    def get_api_list(
+        self,
+        path: str,
+        *,
+        model: Type[_T],
+        page: Type[AsyncPageT],
+        body: Body | None = None,
+        options: RequestOptions = {},
+        method: str = "get",
+    ) -> AsyncPaginator[_T, AsyncPageT]:
+        opts = FinalRequestOptions.construct(
+            method=method, url=path, json_data=body, **options
+        )
+        return self._request_api_list(model, page, opts)
+
+
+def make_request_options(
+    *,
+    query: Query | None = None,
+    extra_headers: Headers | None = None,
+    extra_query: Query | None = None,
+    extra_body: Body | None = None,
+    idempotency_key: str | None = None,
+    timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    post_parser: PostParser | NotGiven = NOT_GIVEN,
+) -> RequestOptions:
+    """Create a dict of type RequestOptions without keys of NotGiven values."""
+    options: RequestOptions = {}
+    if extra_headers is not None:
+        options["headers"] = extra_headers
+
+    if extra_body is not None:
+        options["extra_json"] = cast(AnyMapping, extra_body)
+
+    if query is not None:
+        options["params"] = query
+
+    if extra_query is not None:
+        options["params"] = {**options.get("params", {}), **extra_query}
+
+    if not isinstance(timeout, NotGiven):
+        options["timeout"] = timeout
+
+    if idempotency_key is not None:
+        options["idempotency_key"] = idempotency_key
+
+    if is_given(post_parser):
+        # internal
+        options["post_parser"] = post_parser  # type: ignore
+
+    return options
+
+
+class OtherPlatform:
+    def __init__(self, name: str) -> None:
+        self.name = name
+
+    @override
+    def __str__(self) -> str:
+        return f"Other:{self.name}"
+
+
+Platform = Union[
+    OtherPlatform,
+    Literal[
+        "MacOS",
+        "Linux",
+        "Windows",
+        "FreeBSD",
+        "OpenBSD",
+        "iOS",
+        "Android",
+        "Unknown",
+    ],
+]
+
+
+def get_platform() -> Platform:
+    try:
+        system = platform.system().lower()
+        platform_name = platform.platform().lower()
+    except Exception:
+        return "Unknown"
+
+    if "iphone" in platform_name or "ipad" in platform_name:
+        # Tested using Python3IDE on an iPhone 11 and Pythonista on an iPad 7
+        # system is Darwin and platform_name is a string like:
+        # - Darwin-21.6.0-iPhone12,1-64bit
+        # - Darwin-21.6.0-iPad7,11-64bit
+        return "iOS"
+
+    if system == "darwin":
+        return "MacOS"
+
+    if system == "windows":
+        return "Windows"
+
+    if "android" in platform_name:
+        # Tested using Pydroid 3
+        # system is Linux and platform_name is a string like 'Linux-5.10.81-android12-9-00001-geba40aecb3b7-ab8534902-aarch64-with-libc'
+        return "Android"
+
+    if system == "linux":
+        # https://distro.readthedocs.io/en/latest/#distro.id
+        distro_id = distro.id()
+        if distro_id == "freebsd":
+            return "FreeBSD"
+
+        if distro_id == "openbsd":
+            return "OpenBSD"
+
+        return "Linux"
+
+    if platform_name:
+        return OtherPlatform(platform_name)
+
+    return "Unknown"
+
+
+@lru_cache(maxsize=None)
+def platform_headers(version: str) -> Dict[str, str]:
+    return {
+        "X-Stainless-Lang": "python",
+        "X-Stainless-Package-Version": version,
+        "X-Stainless-OS": str(get_platform()),
+        "X-Stainless-Arch": str(get_architecture()),
+        "X-Stainless-Runtime": get_python_runtime(),
+        "X-Stainless-Runtime-Version": get_python_version(),
+    }
+
+
+class OtherArch:
+    def __init__(self, name: str) -> None:
+        self.name = name
+
+    @override
+    def __str__(self) -> str:
+        return f"other:{self.name}"
+
+
+Arch = Union[OtherArch, Literal["x32", "x64", "arm", "arm64", "unknown"]]
+
+
+def get_python_runtime() -> str:
+    try:
+        return platform.python_implementation()
+    except Exception:
+        return "unknown"
+
+
+def get_python_version() -> str:
+    try:
+        return platform.python_version()
+    except Exception:
+        return "unknown"
+
+
+def get_architecture() -> Arch:
+    try:
+        python_bitness, _ = platform.architecture()
+        machine = platform.machine().lower()
+    except Exception:
+        return "unknown"
+
+    if machine in ("arm64", "aarch64"):
+        return "arm64"
+
+    # TODO: untested
+    if machine == "arm":
+        return "arm"
+
+    if machine == "x86_64":
+        return "x64"
+
+    # TODO: untested
+    if python_bitness == "32bit":
+        return "x32"
+
+    if machine:
+        return OtherArch(machine)
+
+    return "unknown"
+
+
+def _merge_mappings(
+    obj1: Mapping[_T_co, Union[_T, Omit]],
+    obj2: Mapping[_T_co, Union[_T, Omit]],
+) -> Dict[_T_co, _T]:
+    """Merge two mappings of the same type, removing any values that are instances of `Omit`.
+
+    In cases with duplicate keys the second mapping takes precedence.
+    """
+    merged = {**obj1, **obj2}
+    return {key: value for key, value in merged.items() if not isinstance(value, Omit)}
diff --git a/portkey_ai/_vendor/openai/_client.py b/portkey_ai/_vendor/openai/_client.py
new file mode 100644
index 00000000..27e48060
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_client.py
@@ -0,0 +1,575 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, Union, Mapping
+from typing_extensions import Self, override
+
+import httpx
+
+from . import resources, _exceptions
+from ._qs import Querystring
+from ._types import (
+    NOT_GIVEN,
+    Omit,
+    Timeout,
+    NotGiven,
+    Transport,
+    ProxiesTypes,
+    RequestOptions,
+)
+from ._utils import (
+    is_given,
+    is_mapping,
+    get_async_library,
+)
+from ._version import __version__
+from ._streaming import Stream as Stream, AsyncStream as AsyncStream
+from ._exceptions import OpenAIError, APIStatusError
+from ._base_client import (
+    DEFAULT_MAX_RETRIES,
+    SyncAPIClient,
+    AsyncAPIClient,
+)
+
+__all__ = [
+    "Timeout",
+    "Transport",
+    "ProxiesTypes",
+    "RequestOptions",
+    "resources",
+    "OpenAI",
+    "AsyncOpenAI",
+    "Client",
+    "AsyncClient",
+]
+
+
+class OpenAI(SyncAPIClient):
+    completions: resources.Completions
+    chat: resources.Chat
+    embeddings: resources.Embeddings
+    files: resources.Files
+    images: resources.Images
+    audio: resources.Audio
+    moderations: resources.Moderations
+    models: resources.Models
+    fine_tuning: resources.FineTuning
+    beta: resources.Beta
+    batches: resources.Batches
+    with_raw_response: OpenAIWithRawResponse
+    with_streaming_response: OpenAIWithStreamedResponse
+
+    # client options
+    api_key: str
+    organization: str | None
+    project: str | None
+
+    def __init__(
+        self,
+        *,
+        api_key: str | None = None,
+        organization: str | None = None,
+        project: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        # Configure a custom httpx client.
+        # We provide a `DefaultHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
+        # See the [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
+        http_client: httpx.Client | None = None,
+        # Enable or disable schema validation for data returned by the API.
+        # When enabled an error APIResponseValidationError is raised
+        # if the API responds with invalid data for the expected schema.
+        #
+        # This parameter may be removed or changed in the future.
+        # If you rely on this feature, please open a GitHub issue
+        # outlining your use-case to help us decide if it should be
+        # part of our public interface in the future.
+        _strict_response_validation: bool = False,
+    ) -> None:
+        """Construct a new synchronous openai client instance.
+
+        This automatically infers the following arguments from their corresponding environment variables if they are not provided:
+        - `api_key` from `OPENAI_API_KEY`
+        - `organization` from `OPENAI_ORG_ID`
+        - `project` from `OPENAI_PROJECT_ID`
+        """
+        if api_key is None:
+            api_key = os.environ.get("OPENAI_API_KEY")
+        if api_key is None:
+            raise OpenAIError(
+                "The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
+            )
+        self.api_key = api_key
+
+        if organization is None:
+            organization = os.environ.get("OPENAI_ORG_ID")
+        self.organization = organization
+
+        if project is None:
+            project = os.environ.get("OPENAI_PROJECT_ID")
+        self.project = project
+
+        if base_url is None:
+            base_url = os.environ.get("OPENAI_BASE_URL")
+        if base_url is None:
+            base_url = f"https://api.openai.com/v1"
+
+        super().__init__(
+            version=__version__,
+            base_url=base_url,
+            max_retries=max_retries,
+            timeout=timeout,
+            http_client=http_client,
+            custom_headers=default_headers,
+            custom_query=default_query,
+            _strict_response_validation=_strict_response_validation,
+        )
+
+        self._default_stream_cls = Stream
+
+        self.completions = resources.Completions(self)
+        self.chat = resources.Chat(self)
+        self.embeddings = resources.Embeddings(self)
+        self.files = resources.Files(self)
+        self.images = resources.Images(self)
+        self.audio = resources.Audio(self)
+        self.moderations = resources.Moderations(self)
+        self.models = resources.Models(self)
+        self.fine_tuning = resources.FineTuning(self)
+        self.beta = resources.Beta(self)
+        self.batches = resources.Batches(self)
+        self.with_raw_response = OpenAIWithRawResponse(self)
+        self.with_streaming_response = OpenAIWithStreamedResponse(self)
+
+    @property
+    @override
+    def qs(self) -> Querystring:
+        return Querystring(array_format="comma")
+
+    @property
+    @override
+    def auth_headers(self) -> dict[str, str]:
+        api_key = self.api_key
+        return {"Authorization": f"Bearer {api_key}"}
+
+    @property
+    @override
+    def default_headers(self) -> dict[str, str | Omit]:
+        return {
+            **super().default_headers,
+            "X-Stainless-Async": "false",
+            "OpenAI-Organization": self.organization
+            if self.organization is not None
+            else Omit(),
+            "OpenAI-Project": self.project if self.project is not None else Omit(),
+            **self._custom_headers,
+        }
+
+    def copy(
+        self,
+        *,
+        api_key: str | None = None,
+        organization: str | None = None,
+        project: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        http_client: httpx.Client | None = None,
+        max_retries: int | NotGiven = NOT_GIVEN,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        set_default_query: Mapping[str, object] | None = None,
+        _extra_kwargs: Mapping[str, Any] = {},
+    ) -> Self:
+        """
+        Create a new client instance re-using the same options given to the current client with optional overriding.
+        """
+        if default_headers is not None and set_default_headers is not None:
+            raise ValueError(
+                "The `default_headers` and `set_default_headers` arguments are mutually exclusive"
+            )
+
+        if default_query is not None and set_default_query is not None:
+            raise ValueError(
+                "The `default_query` and `set_default_query` arguments are mutually exclusive"
+            )
+
+        headers = self._custom_headers
+        if default_headers is not None:
+            headers = {**headers, **default_headers}
+        elif set_default_headers is not None:
+            headers = set_default_headers
+
+        params = self._custom_query
+        if default_query is not None:
+            params = {**params, **default_query}
+        elif set_default_query is not None:
+            params = set_default_query
+
+        http_client = http_client or self._client
+        return self.__class__(
+            api_key=api_key or self.api_key,
+            organization=organization or self.organization,
+            project=project or self.project,
+            base_url=base_url or self.base_url,
+            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
+            http_client=http_client,
+            max_retries=max_retries if is_given(max_retries) else self.max_retries,
+            default_headers=headers,
+            default_query=params,
+            **_extra_kwargs,
+        )
+
+    # Alias for `copy` for nicer inline usage, e.g.
+    # client.with_options(timeout=10).foo.create(...)
+    with_options = copy
+
+    @override
+    def _make_status_error(
+        self,
+        err_msg: str,
+        *,
+        body: object,
+        response: httpx.Response,
+    ) -> APIStatusError:
+        data = body.get("error", body) if is_mapping(body) else body
+        if response.status_code == 400:
+            return _exceptions.BadRequestError(err_msg, response=response, body=data)
+
+        if response.status_code == 401:
+            return _exceptions.AuthenticationError(
+                err_msg, response=response, body=data
+            )
+
+        if response.status_code == 403:
+            return _exceptions.PermissionDeniedError(
+                err_msg, response=response, body=data
+            )
+
+        if response.status_code == 404:
+            return _exceptions.NotFoundError(err_msg, response=response, body=data)
+
+        if response.status_code == 409:
+            return _exceptions.ConflictError(err_msg, response=response, body=data)
+
+        if response.status_code == 422:
+            return _exceptions.UnprocessableEntityError(
+                err_msg, response=response, body=data
+            )
+
+        if response.status_code == 429:
+            return _exceptions.RateLimitError(err_msg, response=response, body=data)
+
+        if response.status_code >= 500:
+            return _exceptions.InternalServerError(
+                err_msg, response=response, body=data
+            )
+        return APIStatusError(err_msg, response=response, body=data)
+
+
+class AsyncOpenAI(AsyncAPIClient):
+    completions: resources.AsyncCompletions
+    chat: resources.AsyncChat
+    embeddings: resources.AsyncEmbeddings
+    files: resources.AsyncFiles
+    images: resources.AsyncImages
+    audio: resources.AsyncAudio
+    moderations: resources.AsyncModerations
+    models: resources.AsyncModels
+    fine_tuning: resources.AsyncFineTuning
+    beta: resources.AsyncBeta
+    batches: resources.AsyncBatches
+    with_raw_response: AsyncOpenAIWithRawResponse
+    with_streaming_response: AsyncOpenAIWithStreamedResponse
+
+    # client options
+    api_key: str
+    organization: str | None
+    project: str | None
+
+    def __init__(
+        self,
+        *,
+        api_key: str | None = None,
+        organization: str | None = None,
+        project: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        # Configure a custom httpx client.
+        # We provide a `DefaultAsyncHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
+        # See the [httpx documentation](https://www.python-httpx.org/api/#asyncclient) for more details.
+        http_client: httpx.AsyncClient | None = None,
+        # Enable or disable schema validation for data returned by the API.
+        # When enabled an error APIResponseValidationError is raised
+        # if the API responds with invalid data for the expected schema.
+        #
+        # This parameter may be removed or changed in the future.
+        # If you rely on this feature, please open a GitHub issue
+        # outlining your use-case to help us decide if it should be
+        # part of our public interface in the future.
+        _strict_response_validation: bool = False,
+    ) -> None:
+        """Construct a new async openai client instance.
+
+        This automatically infers the following arguments from their corresponding environment variables if they are not provided:
+        - `api_key` from `OPENAI_API_KEY`
+        - `organization` from `OPENAI_ORG_ID`
+        - `project` from `OPENAI_PROJECT_ID`
+        """
+        if api_key is None:
+            api_key = os.environ.get("OPENAI_API_KEY")
+        if api_key is None:
+            raise OpenAIError(
+                "The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
+            )
+        self.api_key = api_key
+
+        if organization is None:
+            organization = os.environ.get("OPENAI_ORG_ID")
+        self.organization = organization
+
+        if project is None:
+            project = os.environ.get("OPENAI_PROJECT_ID")
+        self.project = project
+
+        if base_url is None:
+            base_url = os.environ.get("OPENAI_BASE_URL")
+        if base_url is None:
+            base_url = f"https://api.openai.com/v1"
+
+        super().__init__(
+            version=__version__,
+            base_url=base_url,
+            max_retries=max_retries,
+            timeout=timeout,
+            http_client=http_client,
+            custom_headers=default_headers,
+            custom_query=default_query,
+            _strict_response_validation=_strict_response_validation,
+        )
+
+        self._default_stream_cls = AsyncStream
+
+        self.completions = resources.AsyncCompletions(self)
+        self.chat = resources.AsyncChat(self)
+        self.embeddings = resources.AsyncEmbeddings(self)
+        self.files = resources.AsyncFiles(self)
+        self.images = resources.AsyncImages(self)
+        self.audio = resources.AsyncAudio(self)
+        self.moderations = resources.AsyncModerations(self)
+        self.models = resources.AsyncModels(self)
+        self.fine_tuning = resources.AsyncFineTuning(self)
+        self.beta = resources.AsyncBeta(self)
+        self.batches = resources.AsyncBatches(self)
+        self.with_raw_response = AsyncOpenAIWithRawResponse(self)
+        self.with_streaming_response = AsyncOpenAIWithStreamedResponse(self)
+
+    @property
+    @override
+    def qs(self) -> Querystring:
+        return Querystring(array_format="comma")
+
+    @property
+    @override
+    def auth_headers(self) -> dict[str, str]:
+        api_key = self.api_key
+        return {"Authorization": f"Bearer {api_key}"}
+
+    @property
+    @override
+    def default_headers(self) -> dict[str, str | Omit]:
+        return {
+            **super().default_headers,
+            "X-Stainless-Async": f"async:{get_async_library()}",
+            "OpenAI-Organization": self.organization
+            if self.organization is not None
+            else Omit(),
+            "OpenAI-Project": self.project if self.project is not None else Omit(),
+            **self._custom_headers,
+        }
+
+    def copy(
+        self,
+        *,
+        api_key: str | None = None,
+        organization: str | None = None,
+        project: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        http_client: httpx.AsyncClient | None = None,
+        max_retries: int | NotGiven = NOT_GIVEN,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        set_default_query: Mapping[str, object] | None = None,
+        _extra_kwargs: Mapping[str, Any] = {},
+    ) -> Self:
+        """
+        Create a new client instance re-using the same options given to the current client with optional overriding.
+        """
+        if default_headers is not None and set_default_headers is not None:
+            raise ValueError(
+                "The `default_headers` and `set_default_headers` arguments are mutually exclusive"
+            )
+
+        if default_query is not None and set_default_query is not None:
+            raise ValueError(
+                "The `default_query` and `set_default_query` arguments are mutually exclusive"
+            )
+
+        headers = self._custom_headers
+        if default_headers is not None:
+            headers = {**headers, **default_headers}
+        elif set_default_headers is not None:
+            headers = set_default_headers
+
+        params = self._custom_query
+        if default_query is not None:
+            params = {**params, **default_query}
+        elif set_default_query is not None:
+            params = set_default_query
+
+        http_client = http_client or self._client
+        return self.__class__(
+            api_key=api_key or self.api_key,
+            organization=organization or self.organization,
+            project=project or self.project,
+            base_url=base_url or self.base_url,
+            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
+            http_client=http_client,
+            max_retries=max_retries if is_given(max_retries) else self.max_retries,
+            default_headers=headers,
+            default_query=params,
+            **_extra_kwargs,
+        )
+
+    # Alias for `copy` for nicer inline usage, e.g.
+    # client.with_options(timeout=10).foo.create(...)
+    with_options = copy
+
+    @override
+    def _make_status_error(
+        self,
+        err_msg: str,
+        *,
+        body: object,
+        response: httpx.Response,
+    ) -> APIStatusError:
+        data = body.get("error", body) if is_mapping(body) else body
+        if response.status_code == 400:
+            return _exceptions.BadRequestError(err_msg, response=response, body=data)
+
+        if response.status_code == 401:
+            return _exceptions.AuthenticationError(
+                err_msg, response=response, body=data
+            )
+
+        if response.status_code == 403:
+            return _exceptions.PermissionDeniedError(
+                err_msg, response=response, body=data
+            )
+
+        if response.status_code == 404:
+            return _exceptions.NotFoundError(err_msg, response=response, body=data)
+
+        if response.status_code == 409:
+            return _exceptions.ConflictError(err_msg, response=response, body=data)
+
+        if response.status_code == 422:
+            return _exceptions.UnprocessableEntityError(
+                err_msg, response=response, body=data
+            )
+
+        if response.status_code == 429:
+            return _exceptions.RateLimitError(err_msg, response=response, body=data)
+
+        if response.status_code >= 500:
+            return _exceptions.InternalServerError(
+                err_msg, response=response, body=data
+            )
+        return APIStatusError(err_msg, response=response, body=data)
+
+
+class OpenAIWithRawResponse:
+    def __init__(self, client: OpenAI) -> None:
+        self.completions = resources.CompletionsWithRawResponse(client.completions)
+        self.chat = resources.ChatWithRawResponse(client.chat)
+        self.embeddings = resources.EmbeddingsWithRawResponse(client.embeddings)
+        self.files = resources.FilesWithRawResponse(client.files)
+        self.images = resources.ImagesWithRawResponse(client.images)
+        self.audio = resources.AudioWithRawResponse(client.audio)
+        self.moderations = resources.ModerationsWithRawResponse(client.moderations)
+        self.models = resources.ModelsWithRawResponse(client.models)
+        self.fine_tuning = resources.FineTuningWithRawResponse(client.fine_tuning)
+        self.beta = resources.BetaWithRawResponse(client.beta)
+        self.batches = resources.BatchesWithRawResponse(client.batches)
+
+
+class AsyncOpenAIWithRawResponse:
+    def __init__(self, client: AsyncOpenAI) -> None:
+        self.completions = resources.AsyncCompletionsWithRawResponse(client.completions)
+        self.chat = resources.AsyncChatWithRawResponse(client.chat)
+        self.embeddings = resources.AsyncEmbeddingsWithRawResponse(client.embeddings)
+        self.files = resources.AsyncFilesWithRawResponse(client.files)
+        self.images = resources.AsyncImagesWithRawResponse(client.images)
+        self.audio = resources.AsyncAudioWithRawResponse(client.audio)
+        self.moderations = resources.AsyncModerationsWithRawResponse(client.moderations)
+        self.models = resources.AsyncModelsWithRawResponse(client.models)
+        self.fine_tuning = resources.AsyncFineTuningWithRawResponse(client.fine_tuning)
+        self.beta = resources.AsyncBetaWithRawResponse(client.beta)
+        self.batches = resources.AsyncBatchesWithRawResponse(client.batches)
+
+
+class OpenAIWithStreamedResponse:
+    def __init__(self, client: OpenAI) -> None:
+        self.completions = resources.CompletionsWithStreamingResponse(
+            client.completions
+        )
+        self.chat = resources.ChatWithStreamingResponse(client.chat)
+        self.embeddings = resources.EmbeddingsWithStreamingResponse(client.embeddings)
+        self.files = resources.FilesWithStreamingResponse(client.files)
+        self.images = resources.ImagesWithStreamingResponse(client.images)
+        self.audio = resources.AudioWithStreamingResponse(client.audio)
+        self.moderations = resources.ModerationsWithStreamingResponse(
+            client.moderations
+        )
+        self.models = resources.ModelsWithStreamingResponse(client.models)
+        self.fine_tuning = resources.FineTuningWithStreamingResponse(client.fine_tuning)
+        self.beta = resources.BetaWithStreamingResponse(client.beta)
+        self.batches = resources.BatchesWithStreamingResponse(client.batches)
+
+
+class AsyncOpenAIWithStreamedResponse:
+    def __init__(self, client: AsyncOpenAI) -> None:
+        self.completions = resources.AsyncCompletionsWithStreamingResponse(
+            client.completions
+        )
+        self.chat = resources.AsyncChatWithStreamingResponse(client.chat)
+        self.embeddings = resources.AsyncEmbeddingsWithStreamingResponse(
+            client.embeddings
+        )
+        self.files = resources.AsyncFilesWithStreamingResponse(client.files)
+        self.images = resources.AsyncImagesWithStreamingResponse(client.images)
+        self.audio = resources.AsyncAudioWithStreamingResponse(client.audio)
+        self.moderations = resources.AsyncModerationsWithStreamingResponse(
+            client.moderations
+        )
+        self.models = resources.AsyncModelsWithStreamingResponse(client.models)
+        self.fine_tuning = resources.AsyncFineTuningWithStreamingResponse(
+            client.fine_tuning
+        )
+        self.beta = resources.AsyncBetaWithStreamingResponse(client.beta)
+        self.batches = resources.AsyncBatchesWithStreamingResponse(client.batches)
+
+
+Client = OpenAI
+
+AsyncClient = AsyncOpenAI
diff --git a/portkey_ai/_vendor/openai/_compat.py b/portkey_ai/_vendor/openai/_compat.py
new file mode 100644
index 00000000..0c0c6dfc
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_compat.py
@@ -0,0 +1,235 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, cast, overload
+from datetime import date, datetime
+from typing_extensions import Self
+
+import pydantic
+from pydantic.fields import FieldInfo
+
+from ._types import StrBytesIntFloat
+
+_T = TypeVar("_T")
+_ModelT = TypeVar("_ModelT", bound=pydantic.BaseModel)
+
+# --------------- Pydantic v2 compatibility ---------------
+
+# Pyright incorrectly reports some of our functions as overriding a method when they don't
+# pyright: reportIncompatibleMethodOverride=false
+
+PYDANTIC_V2 = pydantic.VERSION.startswith("2.")
+
+# v1 re-exports
+if TYPE_CHECKING:
+
+    def parse_date(value: date | StrBytesIntFloat) -> date:  # noqa: ARG001
+        ...
+
+    def parse_datetime(
+        value: Union[datetime, StrBytesIntFloat]
+    ) -> datetime:  # noqa: ARG001
+        ...
+
+    def get_args(t: type[Any]) -> tuple[Any, ...]:  # noqa: ARG001
+        ...
+
+    def is_union(tp: type[Any] | None) -> bool:  # noqa: ARG001
+        ...
+
+    def get_origin(t: type[Any]) -> type[Any] | None:  # noqa: ARG001
+        ...
+
+    def is_literal_type(type_: type[Any]) -> bool:  # noqa: ARG001
+        ...
+
+    def is_typeddict(type_: type[Any]) -> bool:  # noqa: ARG001
+        ...
+
+else:
+    if PYDANTIC_V2:
+        from pydantic.v1.typing import (
+            get_args as get_args,
+            is_union as is_union,
+            get_origin as get_origin,
+            is_typeddict as is_typeddict,
+            is_literal_type as is_literal_type,
+        )
+        from pydantic.v1.datetime_parse import (
+            parse_date as parse_date,
+            parse_datetime as parse_datetime,
+        )
+    else:
+        from pydantic.typing import (
+            get_args as get_args,
+            is_union as is_union,
+            get_origin as get_origin,
+            is_typeddict as is_typeddict,
+            is_literal_type as is_literal_type,
+        )
+        from pydantic.datetime_parse import (
+            parse_date as parse_date,
+            parse_datetime as parse_datetime,
+        )
+
+
+# refactored config
+if TYPE_CHECKING:
+    from pydantic import ConfigDict as ConfigDict
+else:
+    if PYDANTIC_V2:
+        from pydantic import ConfigDict
+    else:
+        # TODO: provide an error message here?
+        ConfigDict = None
+
+
+# renamed methods / properties
+def parse_obj(model: type[_ModelT], value: object) -> _ModelT:
+    if PYDANTIC_V2:
+        return model.model_validate(value)
+    else:
+        return cast(
+            _ModelT, model.parse_obj(value)
+        )  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+
+
+def field_is_required(field: FieldInfo) -> bool:
+    if PYDANTIC_V2:
+        return field.is_required()
+    return field.required  # type: ignore
+
+
+def field_get_default(field: FieldInfo) -> Any:
+    value = field.get_default()
+    if PYDANTIC_V2:
+        from pydantic_core import PydanticUndefined
+
+        if value == PydanticUndefined:
+            return None
+        return value
+    return value
+
+
+def field_outer_type(field: FieldInfo) -> Any:
+    if PYDANTIC_V2:
+        return field.annotation
+    return field.outer_type_  # type: ignore
+
+
+def get_model_config(model: type[pydantic.BaseModel]) -> Any:
+    if PYDANTIC_V2:
+        return model.model_config
+    return model.__config__  # type: ignore
+
+
+def get_model_fields(model: type[pydantic.BaseModel]) -> dict[str, FieldInfo]:
+    if PYDANTIC_V2:
+        return model.model_fields
+    return model.__fields__  # type: ignore
+
+
+def model_copy(model: _ModelT) -> _ModelT:
+    if PYDANTIC_V2:
+        return model.model_copy()
+    return model.copy()  # type: ignore
+
+
+def model_json(model: pydantic.BaseModel, *, indent: int | None = None) -> str:
+    if PYDANTIC_V2:
+        return model.model_dump_json(indent=indent)
+    return model.json(indent=indent)  # type: ignore
+
+
+def model_dump(
+    model: pydantic.BaseModel,
+    *,
+    exclude_unset: bool = False,
+    exclude_defaults: bool = False,
+) -> dict[str, Any]:
+    if PYDANTIC_V2:
+        return model.model_dump(
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+        )
+    return cast(
+        "dict[str, Any]",
+        model.dict(  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+        ),
+    )
+
+
+def model_parse(model: type[_ModelT], data: Any) -> _ModelT:
+    if PYDANTIC_V2:
+        return model.model_validate(data)
+    return model.parse_obj(data)  # pyright: ignore[reportDeprecated]
+
+
+# generic models
+if TYPE_CHECKING:
+
+    class GenericModel(pydantic.BaseModel):
+        ...
+
+else:
+    if PYDANTIC_V2:
+        # there no longer needs to be a distinction in v2 but
+        # we still have to create our own subclass to avoid
+        # inconsistent MRO ordering errors
+        class GenericModel(pydantic.BaseModel):
+            ...
+
+    else:
+        import pydantic.generics
+
+        class GenericModel(pydantic.generics.GenericModel, pydantic.BaseModel):
+            ...
+
+
+# cached properties
+if TYPE_CHECKING:
+    cached_property = property
+
+    # we define a separate type (copied from typeshed)
+    # that represents that `cached_property` is `set`able
+    # at runtime, which differs from `@property`.
+    #
+    # this is a separate type as editors likely special case
+    # `@property` and we don't want to cause issues just to have
+    # more helpful internal types.
+
+    class typed_cached_property(Generic[_T]):
+        func: Callable[[Any], _T]
+        attrname: str | None
+
+        def __init__(self, func: Callable[[Any], _T]) -> None:
+            ...
+
+        @overload
+        def __get__(self, instance: None, owner: type[Any] | None = None) -> Self:
+            ...
+
+        @overload
+        def __get__(self, instance: object, owner: type[Any] | None = None) -> _T:
+            ...
+
+        def __get__(
+            self, instance: object, owner: type[Any] | None = None
+        ) -> _T | Self:
+            raise NotImplementedError()
+
+        def __set_name__(self, owner: type[Any], name: str) -> None:
+            ...
+
+        # __set__ is not defined at runtime, but @cached_property is designed to be settable
+        def __set__(self, instance: object, value: _T) -> None:
+            ...
+
+else:
+    try:
+        from functools import cached_property as cached_property
+    except ImportError:
+        from cached_property import cached_property as cached_property
+
+    typed_cached_property = cached_property
diff --git a/portkey_ai/_vendor/openai/_constants.py b/portkey_ai/_vendor/openai/_constants.py
new file mode 100644
index 00000000..3075407c
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_constants.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import httpx
+
+RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response"
+OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"
+
+# default timeout is 10 minutes
+DEFAULT_TIMEOUT = httpx.Timeout(timeout=600.0, connect=5.0)
+DEFAULT_MAX_RETRIES = 2
+DEFAULT_CONNECTION_LIMITS = httpx.Limits(
+    max_connections=1000, max_keepalive_connections=100
+)
+
+INITIAL_RETRY_DELAY = 0.5
+MAX_RETRY_DELAY = 8.0
diff --git a/portkey_ai/_vendor/openai/_exceptions.py b/portkey_ai/_vendor/openai/_exceptions.py
new file mode 100644
index 00000000..e5ef1177
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_exceptions.py
@@ -0,0 +1,161 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Any, Optional, cast
+from typing_extensions import Literal
+
+import httpx
+
+from ._utils import is_dict
+from ._models import construct_type
+
+__all__ = [
+    "BadRequestError",
+    "AuthenticationError",
+    "PermissionDeniedError",
+    "NotFoundError",
+    "ConflictError",
+    "UnprocessableEntityError",
+    "RateLimitError",
+    "InternalServerError",
+]
+
+
+class OpenAIError(Exception):
+    pass
+
+
+class APIError(OpenAIError):
+    message: str
+    request: httpx.Request
+
+    body: object | None
+    """The API response body.
+
+    If the API responded with a valid JSON structure then this property will be the
+    decoded result.
+
+    If it isn't a valid JSON structure then this will be the raw response.
+
+    If there was no response associated with this error then it will be `None`.
+    """
+
+    code: Optional[str] = None
+    param: Optional[str] = None
+    type: Optional[str]
+
+    def __init__(
+        self, message: str, request: httpx.Request, *, body: object | None
+    ) -> None:
+        super().__init__(message)
+        self.request = request
+        self.message = message
+        self.body = body
+
+        if is_dict(body):
+            self.code = cast(
+                Any, construct_type(type_=Optional[str], value=body.get("code"))
+            )
+            self.param = cast(
+                Any, construct_type(type_=Optional[str], value=body.get("param"))
+            )
+            self.type = cast(Any, construct_type(type_=str, value=body.get("type")))
+        else:
+            self.code = None
+            self.param = None
+            self.type = None
+
+
+class APIResponseValidationError(APIError):
+    response: httpx.Response
+    status_code: int
+
+    def __init__(
+        self,
+        response: httpx.Response,
+        body: object | None,
+        *,
+        message: str | None = None,
+    ) -> None:
+        super().__init__(
+            message or "Data returned by API invalid for expected schema.",
+            response.request,
+            body=body,
+        )
+        self.response = response
+        self.status_code = response.status_code
+
+
+class APIStatusError(APIError):
+    """Raised when an API response has a status code of 4xx or 5xx."""
+
+    response: httpx.Response
+    status_code: int
+    request_id: str | None
+
+    def __init__(
+        self, message: str, *, response: httpx.Response, body: object | None
+    ) -> None:
+        super().__init__(message, response.request, body=body)
+        self.response = response
+        self.status_code = response.status_code
+        self.request_id = response.headers.get("x-request-id")
+
+
+class APIConnectionError(APIError):
+    def __init__(
+        self, *, message: str = "Connection error.", request: httpx.Request
+    ) -> None:
+        super().__init__(message, request, body=None)
+
+
+class APITimeoutError(APIConnectionError):
+    def __init__(self, request: httpx.Request) -> None:
+        super().__init__(message="Request timed out.", request=request)
+
+
+class BadRequestError(APIStatusError):
+    status_code: Literal[
+        400
+    ] = 400  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class AuthenticationError(APIStatusError):
+    status_code: Literal[
+        401
+    ] = 401  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class PermissionDeniedError(APIStatusError):
+    status_code: Literal[
+        403
+    ] = 403  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class NotFoundError(APIStatusError):
+    status_code: Literal[
+        404
+    ] = 404  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class ConflictError(APIStatusError):
+    status_code: Literal[
+        409
+    ] = 409  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class UnprocessableEntityError(APIStatusError):
+    status_code: Literal[
+        422
+    ] = 422  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class RateLimitError(APIStatusError):
+    status_code: Literal[
+        429
+    ] = 429  # pyright: ignore[reportIncompatibleVariableOverride]
+
+
+class InternalServerError(APIStatusError):
+    pass
diff --git a/portkey_ai/_vendor/openai/_extras/__init__.py b/portkey_ai/_vendor/openai/_extras/__init__.py
new file mode 100644
index 00000000..864dac41
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_extras/__init__.py
@@ -0,0 +1,2 @@
+from .numpy_proxy import numpy as numpy, has_numpy as has_numpy
+from .pandas_proxy import pandas as pandas
diff --git a/portkey_ai/_vendor/openai/_extras/_common.py b/portkey_ai/_vendor/openai/_extras/_common.py
new file mode 100644
index 00000000..6e71720e
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_extras/_common.py
@@ -0,0 +1,21 @@
+from .._exceptions import OpenAIError
+
+INSTRUCTIONS = """
+
+OpenAI error:
+
+    missing `{library}`
+
+This feature requires additional dependencies:
+
+    $ pip install openai[{extra}]
+
+"""
+
+
+def format_instructions(*, library: str, extra: str) -> str:
+    return INSTRUCTIONS.format(library=library, extra=extra)
+
+
+class MissingDependencyError(OpenAIError):
+    pass
diff --git a/portkey_ai/_vendor/openai/_extras/numpy_proxy.py b/portkey_ai/_vendor/openai/_extras/numpy_proxy.py
new file mode 100644
index 00000000..27880bf1
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_extras/numpy_proxy.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+from typing_extensions import override
+
+from .._utils import LazyProxy
+from ._common import MissingDependencyError, format_instructions
+
+if TYPE_CHECKING:
+    import numpy as numpy
+
+
+NUMPY_INSTRUCTIONS = format_instructions(library="numpy", extra="datalib")
+
+
+class NumpyProxy(LazyProxy[Any]):
+    @override
+    def __load__(self) -> Any:
+        try:
+            import numpy
+        except ImportError as err:
+            raise MissingDependencyError(NUMPY_INSTRUCTIONS) from err
+
+        return numpy
+
+
+if not TYPE_CHECKING:
+    numpy = NumpyProxy()
+
+
+def has_numpy() -> bool:
+    try:
+        import numpy  # noqa: F401  # pyright: ignore[reportUnusedImport]
+    except ImportError:
+        return False
+
+    return True
diff --git a/portkey_ai/_vendor/openai/_extras/pandas_proxy.py b/portkey_ai/_vendor/openai/_extras/pandas_proxy.py
new file mode 100644
index 00000000..686377ba
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_extras/pandas_proxy.py
@@ -0,0 +1,28 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+from typing_extensions import override
+
+from .._utils import LazyProxy
+from ._common import MissingDependencyError, format_instructions
+
+if TYPE_CHECKING:
+    import pandas as pandas
+
+
+PANDAS_INSTRUCTIONS = format_instructions(library="pandas", extra="datalib")
+
+
+class PandasProxy(LazyProxy[Any]):
+    @override
+    def __load__(self) -> Any:
+        try:
+            import pandas
+        except ImportError as err:
+            raise MissingDependencyError(PANDAS_INSTRUCTIONS) from err
+
+        return pandas
+
+
+if not TYPE_CHECKING:
+    pandas = PandasProxy()
diff --git a/portkey_ai/_vendor/openai/_files.py b/portkey_ai/_vendor/openai/_files.py
new file mode 100644
index 00000000..caad9db8
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_files.py
@@ -0,0 +1,142 @@
+from __future__ import annotations
+
+import io
+import os
+import pathlib
+from typing import overload
+from typing_extensions import TypeGuard
+
+import anyio
+
+from ._types import (
+    FileTypes,
+    FileContent,
+    RequestFiles,
+    HttpxFileTypes,
+    Base64FileInput,
+    HttpxFileContent,
+    HttpxRequestFiles,
+)
+from ._utils import is_tuple_t, is_mapping_t, is_sequence_t
+
+
+def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]:
+    return isinstance(obj, io.IOBase) or isinstance(obj, os.PathLike)
+
+
+def is_file_content(obj: object) -> TypeGuard[FileContent]:
+    return (
+        isinstance(obj, bytes)
+        or isinstance(obj, tuple)
+        or isinstance(obj, io.IOBase)
+        or isinstance(obj, os.PathLike)
+    )
+
+
+def assert_is_file_content(obj: object, *, key: str | None = None) -> None:
+    if not is_file_content(obj):
+        prefix = (
+            f"Expected entry at `{key}`"
+            if key is not None
+            else f"Expected file input `{obj!r}`"
+        )
+        raise RuntimeError(
+            f"{prefix} to be bytes, an io.IOBase instance, PathLike or a tuple but received {type(obj)} instead. See https://github.com/openai/openai-python/tree/main#file-uploads"
+        ) from None
+
+
+@overload
+def to_httpx_files(files: None) -> None:
+    ...
+
+
+@overload
+def to_httpx_files(files: RequestFiles) -> HttpxRequestFiles:
+    ...
+
+
+def to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
+    if files is None:
+        return None
+
+    if is_mapping_t(files):
+        files = {key: _transform_file(file) for key, file in files.items()}
+    elif is_sequence_t(files):
+        files = [(key, _transform_file(file)) for key, file in files]
+    else:
+        raise TypeError(
+            f"Unexpected file type input {type(files)}, expected mapping or sequence"
+        )
+
+    return files
+
+
+def _transform_file(file: FileTypes) -> HttpxFileTypes:
+    if is_file_content(file):
+        if isinstance(file, os.PathLike):
+            path = pathlib.Path(file)
+            return (path.name, path.read_bytes())
+
+        return file
+
+    if is_tuple_t(file):
+        return (file[0], _read_file_content(file[1]), *file[2:])
+
+    raise TypeError(
+        f"Expected file types input to be a FileContent type or to be a tuple"
+    )
+
+
+def _read_file_content(file: FileContent) -> HttpxFileContent:
+    if isinstance(file, os.PathLike):
+        return pathlib.Path(file).read_bytes()
+    return file
+
+
+@overload
+async def async_to_httpx_files(files: None) -> None:
+    ...
+
+
+@overload
+async def async_to_httpx_files(files: RequestFiles) -> HttpxRequestFiles:
+    ...
+
+
+async def async_to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
+    if files is None:
+        return None
+
+    if is_mapping_t(files):
+        files = {key: await _async_transform_file(file) for key, file in files.items()}
+    elif is_sequence_t(files):
+        files = [(key, await _async_transform_file(file)) for key, file in files]
+    else:
+        raise TypeError(
+            "Unexpected file type input {type(files)}, expected mapping or sequence"
+        )
+
+    return files
+
+
+async def _async_transform_file(file: FileTypes) -> HttpxFileTypes:
+    if is_file_content(file):
+        if isinstance(file, os.PathLike):
+            path = anyio.Path(file)
+            return (path.name, await path.read_bytes())
+
+        return file
+
+    if is_tuple_t(file):
+        return (file[0], await _async_read_file_content(file[1]), *file[2:])
+
+    raise TypeError(
+        f"Expected file types input to be a FileContent type or to be a tuple"
+    )
+
+
+async def _async_read_file_content(file: FileContent) -> HttpxFileContent:
+    if isinstance(file, os.PathLike):
+        return await anyio.Path(file).read_bytes()
+
+    return file
diff --git a/portkey_ai/_vendor/openai/_legacy_response.py b/portkey_ai/_vendor/openai/_legacy_response.py
new file mode 100644
index 00000000..d69a14fb
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_legacy_response.py
@@ -0,0 +1,499 @@
+from __future__ import annotations
+
+import os
+import inspect
+import logging
+import datetime
+import functools
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Union,
+    Generic,
+    TypeVar,
+    Callable,
+    Iterator,
+    AsyncIterator,
+    cast,
+    overload,
+)
+from typing_extensions import Awaitable, ParamSpec, override, deprecated, get_origin
+
+import anyio
+import httpx
+import pydantic
+
+from ._types import NoneType
+from ._utils import is_given, extract_type_arg, is_annotated_type
+from ._models import BaseModel, is_basemodel
+from ._constants import RAW_RESPONSE_HEADER
+from ._streaming import (
+    Stream,
+    AsyncStream,
+    is_stream_class_type,
+    extract_stream_chunk_type,
+)
+from ._exceptions import APIResponseValidationError
+
+if TYPE_CHECKING:
+    from ._models import FinalRequestOptions
+    from ._base_client import BaseClient
+
+
+P = ParamSpec("P")
+R = TypeVar("R")
+_T = TypeVar("_T")
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class LegacyAPIResponse(Generic[R]):
+    """This is a legacy class as it will be replaced by `APIResponse`
+    and `AsyncAPIResponse` in the `_response.py` file in the next major
+    release.
+
+    For the sync client this will mostly be the same with the exception
+    of `content` & `text` will be methods instead of properties. In the
+    async client, all methods will be async.
+
+    A migration script will be provided & the migration in general should
+    be smooth.
+    """
+
+    _cast_to: type[R]
+    _client: BaseClient[Any, Any]
+    _parsed_by_type: dict[type[Any], Any]
+    _stream: bool
+    _stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None
+    _options: FinalRequestOptions
+
+    http_response: httpx.Response
+
+    def __init__(
+        self,
+        *,
+        raw: httpx.Response,
+        cast_to: type[R],
+        client: BaseClient[Any, Any],
+        stream: bool,
+        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+        options: FinalRequestOptions,
+    ) -> None:
+        self._cast_to = cast_to
+        self._client = client
+        self._parsed_by_type = {}
+        self._stream = stream
+        self._stream_cls = stream_cls
+        self._options = options
+        self.http_response = raw
+
+    @property
+    def request_id(self) -> str | None:
+        return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
+
+    @overload
+    def parse(self, *, to: type[_T]) -> _T:
+        ...
+
+    @overload
+    def parse(self) -> R:
+        ...
+
+    def parse(self, *, to: type[_T] | None = None) -> R | _T:
+        """Returns the rich python representation of this response's data.
+
+        NOTE: For the async client: this will become a coroutine in the next major version.
+
+        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+        You can customise the type that the response is parsed into through
+        the `to` argument, e.g.
+
+        ```py
+        from openai import BaseModel
+
+
+        class MyModel(BaseModel):
+            foo: str
+
+
+        obj = response.parse(to=MyModel)
+        print(obj.foo)
+        ```
+
+        We support parsing:
+          - `BaseModel`
+          - `dict`
+          - `list`
+          - `Union`
+          - `str`
+          - `int`
+          - `float`
+          - `httpx.Response`
+        """
+        cache_key = to if to is not None else self._cast_to
+        cached = self._parsed_by_type.get(cache_key)
+        if cached is not None:
+            return cached  # type: ignore[no-any-return]
+
+        parsed = self._parse(to=to)
+        if is_given(self._options.post_parser):
+            parsed = self._options.post_parser(parsed)
+
+        self._parsed_by_type[cache_key] = parsed
+        return parsed
+
+    @property
+    def headers(self) -> httpx.Headers:
+        return self.http_response.headers
+
+    @property
+    def http_request(self) -> httpx.Request:
+        return self.http_response.request
+
+    @property
+    def status_code(self) -> int:
+        return self.http_response.status_code
+
+    @property
+    def url(self) -> httpx.URL:
+        return self.http_response.url
+
+    @property
+    def method(self) -> str:
+        return self.http_request.method
+
+    @property
+    def content(self) -> bytes:
+        """Return the binary response content.
+
+        NOTE: this will be removed in favour of `.read()` in the
+        next major version.
+        """
+        return self.http_response.content
+
+    @property
+    def text(self) -> str:
+        """Return the decoded response content.
+
+        NOTE: this will be turned into a method in the next major version.
+        """
+        return self.http_response.text
+
+    @property
+    def http_version(self) -> str:
+        return self.http_response.http_version
+
+    @property
+    def is_closed(self) -> bool:
+        return self.http_response.is_closed
+
+    @property
+    def elapsed(self) -> datetime.timedelta:
+        """The time taken for the complete request/response cycle to complete."""
+        return self.http_response.elapsed
+
+    def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        # unwrap `Annotated[T, ...]` -> `T`
+        if to and is_annotated_type(to):
+            to = extract_type_arg(to, 0)
+
+        if self._stream:
+            if to:
+                if not is_stream_class_type(to):
+                    raise TypeError(
+                        f"Expected custom parse type to be a subclass of {Stream} or {AsyncStream}"
+                    )
+
+                return cast(
+                    _T,
+                    to(
+                        cast_to=extract_stream_chunk_type(
+                            to,
+                            failure_message="Expected custom stream type to be passed with a type argument, e.g. Stream[ChunkType]",
+                        ),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
+            if self._stream_cls:
+                return cast(
+                    R,
+                    self._stream_cls(
+                        cast_to=extract_stream_chunk_type(self._stream_cls),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
+            stream_cls = cast(
+                "type[Stream[Any]] | type[AsyncStream[Any]] | None",
+                self._client._default_stream_cls,
+            )
+            if stream_cls is None:
+                raise MissingStreamClassError()
+
+            return cast(
+                R,
+                stream_cls(
+                    cast_to=self._cast_to,
+                    response=self.http_response,
+                    client=cast(Any, self._client),
+                ),
+            )
+
+        cast_to = to if to is not None else self._cast_to
+
+        # unwrap `Annotated[T, ...]` -> `T`
+        if is_annotated_type(cast_to):
+            cast_to = extract_type_arg(cast_to, 0)
+
+        if cast_to is NoneType:
+            return cast(R, None)
+
+        response = self.http_response
+        if cast_to == str:
+            return cast(R, response.text)
+
+        if cast_to == int:
+            return cast(R, int(response.text))
+
+        if cast_to == float:
+            return cast(R, float(response.text))
+
+        origin = get_origin(cast_to) or cast_to
+
+        if inspect.isclass(origin) and issubclass(origin, HttpxBinaryResponseContent):
+            return cast(R, cast_to(response))  # type: ignore
+
+        if origin == LegacyAPIResponse:
+            raise RuntimeError("Unexpected state - cast_to is `APIResponse`")
+
+        if inspect.isclass(origin) and issubclass(origin, httpx.Response):
+            # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response
+            # and pass that class to our request functions. We cannot change the variance to be either
+            # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct
+            # the response class ourselves but that is something that should be supported directly in httpx
+            # as it would be easy to incorrectly construct the Response object due to the multitude of arguments.
+            if cast_to != httpx.Response:
+                raise ValueError(
+                    f"Subclasses of httpx.Response cannot be passed to `cast_to`"
+                )
+            return cast(R, response)
+
+        if (
+            inspect.isclass(origin)
+            and not issubclass(origin, BaseModel)
+            and issubclass(origin, pydantic.BaseModel)
+        ):
+            raise TypeError(
+                "Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`"
+            )
+
+        if (
+            cast_to is not object
+            and not origin is list
+            and not origin is dict
+            and not origin is Union
+            and not issubclass(origin, BaseModel)
+        ):
+            raise RuntimeError(
+                f"Unsupported type, expected {cast_to} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}."
+            )
+
+        # split is required to handle cases where additional information is included
+        # in the response, e.g. application/json; charset=utf-8
+        content_type, *_ = response.headers.get("content-type", "*").split(";")
+        if content_type != "application/json":
+            if is_basemodel(cast_to):
+                try:
+                    data = response.json()
+                except Exception as exc:
+                    log.debug(
+                        "Could not read JSON from response data due to %s - %s",
+                        type(exc),
+                        exc,
+                    )
+                else:
+                    return self._client._process_response_data(
+                        data=data,
+                        cast_to=cast_to,  # type: ignore
+                        response=response,
+                    )
+
+            if self._client._strict_response_validation:
+                raise APIResponseValidationError(
+                    response=response,
+                    message=f"Expected Content-Type response header to be `application/json` but received `{content_type}` instead.",
+                    body=response.text,
+                )
+
+            # If the API responds with content that isn't JSON then we just return
+            # the (decoded) text without performing any parsing so that you can still
+            # handle the response however you need to.
+            return response.text  # type: ignore
+
+        data = response.json()
+
+        return self._client._process_response_data(
+            data=data,
+            cast_to=cast_to,  # type: ignore
+            response=response,
+        )
+
+    @override
+    def __repr__(self) -> str:
+        return f"<APIResponse [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_to}>"
+
+
+class MissingStreamClassError(TypeError):
+    def __init__(self) -> None:
+        super().__init__(
+            "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `openai._streaming` for reference",
+        )
+
+
+def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, LegacyAPIResponse[R]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> LegacyAPIResponse[R]:
+        extra_headers: dict[str, str] = {
+            **(cast(Any, kwargs.get("extra_headers")) or {})
+        }
+        extra_headers[RAW_RESPONSE_HEADER] = "true"
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(LegacyAPIResponse[R], func(*args, **kwargs))
+
+    return wrapped
+
+
+def async_to_raw_response_wrapper(
+    func: Callable[P, Awaitable[R]]
+) -> Callable[P, Awaitable[LegacyAPIResponse[R]]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    async def wrapped(*args: P.args, **kwargs: P.kwargs) -> LegacyAPIResponse[R]:
+        extra_headers: dict[str, str] = {
+            **(cast(Any, kwargs.get("extra_headers")) or {})
+        }
+        extra_headers[RAW_RESPONSE_HEADER] = "true"
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(LegacyAPIResponse[R], await func(*args, **kwargs))
+
+    return wrapped
+
+
+class HttpxBinaryResponseContent:
+    response: httpx.Response
+
+    def __init__(self, response: httpx.Response) -> None:
+        self.response = response
+
+    @property
+    def content(self) -> bytes:
+        return self.response.content
+
+    @property
+    def text(self) -> str:
+        return self.response.text
+
+    @property
+    def encoding(self) -> str | None:
+        return self.response.encoding
+
+    @property
+    def charset_encoding(self) -> str | None:
+        return self.response.charset_encoding
+
+    def json(self, **kwargs: Any) -> Any:
+        return self.response.json(**kwargs)
+
+    def read(self) -> bytes:
+        return self.response.read()
+
+    def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        return self.response.iter_bytes(chunk_size)
+
+    def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+        return self.response.iter_text(chunk_size)
+
+    def iter_lines(self) -> Iterator[str]:
+        return self.response.iter_lines()
+
+    def iter_raw(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        return self.response.iter_raw(chunk_size)
+
+    def write_to_file(
+        self,
+        file: str | os.PathLike[str],
+    ) -> None:
+        """Write the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+
+        Note: if you want to stream the data to the file instead of writing
+        all at once then you should use `.with_streaming_response` when making
+        the API request, e.g. `client.with_streaming_response.foo().stream_to_file('my_filename.txt')`
+        """
+        with open(file, mode="wb") as f:
+            for data in self.response.iter_bytes():
+                f.write(data)
+
+    @deprecated(
+        "Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead"
+    )
+    def stream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        with open(file, mode="wb") as f:
+            for data in self.response.iter_bytes(chunk_size):
+                f.write(data)
+
+    def close(self) -> None:
+        return self.response.close()
+
+    async def aread(self) -> bytes:
+        return await self.response.aread()
+
+    async def aiter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        return self.response.aiter_bytes(chunk_size)
+
+    async def aiter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
+        return self.response.aiter_text(chunk_size)
+
+    async def aiter_lines(self) -> AsyncIterator[str]:
+        return self.response.aiter_lines()
+
+    async def aiter_raw(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        return self.response.aiter_raw(chunk_size)
+
+    @deprecated(
+        "Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead"
+    )
+    async def astream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        path = anyio.Path(file)
+        async with await path.open(mode="wb") as f:
+            async for data in self.response.aiter_bytes(chunk_size):
+                await f.write(data)
+
+    async def aclose(self) -> None:
+        return await self.response.aclose()
diff --git a/portkey_ai/_vendor/openai/_models.py b/portkey_ai/_vendor/openai/_models.py
new file mode 100644
index 00000000..18bd33b8
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_models.py
@@ -0,0 +1,755 @@
+from __future__ import annotations
+
+import os
+import inspect
+from typing import TYPE_CHECKING, Any, Type, Union, Generic, TypeVar, Callable, cast
+from datetime import date, datetime
+from typing_extensions import (
+    Unpack,
+    Literal,
+    ClassVar,
+    Protocol,
+    Required,
+    TypedDict,
+    TypeGuard,
+    final,
+    override,
+    runtime_checkable,
+)
+
+import pydantic
+import pydantic.generics
+from pydantic.fields import FieldInfo
+
+from ._types import (
+    Body,
+    IncEx,
+    Query,
+    ModelT,
+    Headers,
+    Timeout,
+    NotGiven,
+    AnyMapping,
+    HttpxRequestFiles,
+)
+from ._utils import (
+    PropertyInfo,
+    is_list,
+    is_given,
+    lru_cache,
+    is_mapping,
+    parse_date,
+    coerce_boolean,
+    parse_datetime,
+    strip_not_given,
+    extract_type_arg,
+    is_annotated_type,
+    strip_annotated_type,
+)
+from ._compat import (
+    PYDANTIC_V2,
+    ConfigDict,
+    GenericModel as BaseGenericModel,
+    get_args,
+    is_union,
+    parse_obj,
+    get_origin,
+    is_literal_type,
+    get_model_config,
+    get_model_fields,
+    field_get_default,
+)
+from ._constants import RAW_RESPONSE_HEADER
+
+if TYPE_CHECKING:
+    from pydantic_core.core_schema import ModelField, ModelFieldsSchema
+
+__all__ = ["BaseModel", "GenericModel"]
+
+_T = TypeVar("_T")
+
+
+@runtime_checkable
+class _ConfigProtocol(Protocol):
+    allow_population_by_field_name: bool
+
+
+class BaseModel(pydantic.BaseModel):
+    if PYDANTIC_V2:
+        model_config: ClassVar[ConfigDict] = ConfigDict(
+            extra="allow",
+            defer_build=coerce_boolean(os.environ.get("DEFER_PYDANTIC_BUILD", "true")),
+        )
+    else:
+
+        @property
+        @override
+        def model_fields_set(self) -> set[str]:
+            # a forwards-compat shim for pydantic v2
+            return self.__fields_set__  # type: ignore
+
+        class Config(pydantic.BaseConfig):  # pyright: ignore[reportDeprecated]
+            extra: Any = pydantic.Extra.allow  # type: ignore
+
+    def to_dict(
+        self,
+        *,
+        mode: Literal["json", "python"] = "python",
+        use_api_names: bool = True,
+        exclude_unset: bool = True,
+        exclude_defaults: bool = False,
+        exclude_none: bool = False,
+        warnings: bool = True,
+    ) -> dict[str, object]:
+        """Recursively generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
+
+        By default, fields that were not set by the API will not be included,
+        and keys will match the API response, *not* the property names from the model.
+
+        For example, if the API responds with `"fooBar": true` but we've defined a `foo_bar: bool` property,
+        the output will use the `"fooBar"` key (unless `use_api_names=False` is passed).
+
+        Args:
+            mode:
+                If mode is 'json', the dictionary will only contain JSON serializable types. e.g. `datetime` will be turned into a string, `"2024-3-22T18:11:19.117000Z"`.
+                If mode is 'python', the dictionary may contain any Python objects. e.g. `datetime(2024, 3, 22)`
+
+            use_api_names: Whether to use the key that the API responded with or the property name. Defaults to `True`.
+            exclude_unset: Whether to exclude fields that have not been explicitly set.
+            exclude_defaults: Whether to exclude fields that are set to their default value from the output.
+            exclude_none: Whether to exclude fields that have a value of `None` from the output.
+            warnings: Whether to log warnings when invalid fields are encountered. This is only supported in Pydantic v2.
+        """
+        return self.model_dump(
+            mode=mode,
+            by_alias=use_api_names,
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+            exclude_none=exclude_none,
+            warnings=warnings,
+        )
+
+    def to_json(
+        self,
+        *,
+        indent: int | None = 2,
+        use_api_names: bool = True,
+        exclude_unset: bool = True,
+        exclude_defaults: bool = False,
+        exclude_none: bool = False,
+        warnings: bool = True,
+    ) -> str:
+        """Generates a JSON string representing this model as it would be received from or sent to the API (but with indentation).
+
+        By default, fields that were not set by the API will not be included,
+        and keys will match the API response, *not* the property names from the model.
+
+        For example, if the API responds with `"fooBar": true` but we've defined a `foo_bar: bool` property,
+        the output will use the `"fooBar"` key (unless `use_api_names=False` is passed).
+
+        Args:
+            indent: Indentation to use in the JSON output. If `None` is passed, the output will be compact. Defaults to `2`
+            use_api_names: Whether to use the key that the API responded with or the property name. Defaults to `True`.
+            exclude_unset: Whether to exclude fields that have not been explicitly set.
+            exclude_defaults: Whether to exclude fields that have the default value.
+            exclude_none: Whether to exclude fields that have a value of `None`.
+            warnings: Whether to show any warnings that occurred during serialization. This is only supported in Pydantic v2.
+        """
+        return self.model_dump_json(
+            indent=indent,
+            by_alias=use_api_names,
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+            exclude_none=exclude_none,
+            warnings=warnings,
+        )
+
+    @override
+    def __str__(self) -> str:
+        # mypy complains about an invalid self arg
+        return f'{self.__repr_name__()}({self.__repr_str__(", ")})'  # type: ignore[misc]
+
+    # Override the 'construct' method in a way that supports recursive parsing without validation.
+    # Based on https://github.com/samuelcolvin/pydantic/issues/1168#issuecomment-817742836.
+    @classmethod
+    @override
+    def construct(
+        cls: Type[ModelT],
+        _fields_set: set[str] | None = None,
+        **values: object,
+    ) -> ModelT:
+        m = cls.__new__(cls)
+        fields_values: dict[str, object] = {}
+
+        config = get_model_config(cls)
+        populate_by_name = (
+            config.allow_population_by_field_name
+            if isinstance(config, _ConfigProtocol)
+            else config.get("populate_by_name")
+        )
+
+        if _fields_set is None:
+            _fields_set = set()
+
+        model_fields = get_model_fields(cls)
+        for name, field in model_fields.items():
+            key = field.alias
+            if key is None or (key not in values and populate_by_name):
+                key = name
+
+            if key in values:
+                fields_values[name] = _construct_field(
+                    value=values[key], field=field, key=key
+                )
+                _fields_set.add(name)
+            else:
+                fields_values[name] = field_get_default(field)
+
+        _extra = {}
+        for key, value in values.items():
+            if key not in model_fields:
+                if PYDANTIC_V2:
+                    _extra[key] = value
+                else:
+                    _fields_set.add(key)
+                    fields_values[key] = value
+
+        object.__setattr__(m, "__dict__", fields_values)
+
+        if PYDANTIC_V2:
+            # these properties are copied from Pydantic's `model_construct()` method
+            object.__setattr__(m, "__pydantic_private__", None)
+            object.__setattr__(m, "__pydantic_extra__", _extra)
+            object.__setattr__(m, "__pydantic_fields_set__", _fields_set)
+        else:
+            # init_private_attributes() does not exist in v2
+            m._init_private_attributes()  # type: ignore
+
+            # copied from Pydantic v1's `construct()` method
+            object.__setattr__(m, "__fields_set__", _fields_set)
+
+        return m
+
+    if not TYPE_CHECKING:
+        # type checkers incorrectly complain about this assignment
+        # because the type signatures are technically different
+        # although not in practice
+        model_construct = construct
+
+    if not PYDANTIC_V2:
+        # we define aliases for some of the new pydantic v2 methods so
+        # that we can just document these methods without having to specify
+        # a specific pydantic version as some users may not know which
+        # pydantic version they are currently using
+
+        @override
+        def model_dump(
+            self,
+            *,
+            mode: Literal["json", "python"] | str = "python",
+            include: IncEx = None,
+            exclude: IncEx = None,
+            by_alias: bool = False,
+            exclude_unset: bool = False,
+            exclude_defaults: bool = False,
+            exclude_none: bool = False,
+            round_trip: bool = False,
+            warnings: bool = True,
+        ) -> dict[str, Any]:
+            """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump
+
+            Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
+
+            Args:
+                mode: The mode in which `to_python` should run.
+                    If mode is 'json', the dictionary will only contain JSON serializable types.
+                    If mode is 'python', the dictionary may contain any Python objects.
+                include: A list of fields to include in the output.
+                exclude: A list of fields to exclude from the output.
+                by_alias: Whether to use the field's alias in the dictionary key if defined.
+                exclude_unset: Whether to exclude fields that are unset or None from the output.
+                exclude_defaults: Whether to exclude fields that are set to their default value from the output.
+                exclude_none: Whether to exclude fields that have a value of `None` from the output.
+                round_trip: Whether to enable serialization and deserialization round-trip support.
+                warnings: Whether to log warnings when invalid fields are encountered.
+
+            Returns:
+                A dictionary representation of the model.
+            """
+            if mode != "python":
+                raise ValueError("mode is only supported in Pydantic v2")
+            if round_trip != False:
+                raise ValueError("round_trip is only supported in Pydantic v2")
+            if warnings != True:
+                raise ValueError("warnings is only supported in Pydantic v2")
+            return super().dict(  # pyright: ignore[reportDeprecated]
+                include=include,
+                exclude=exclude,
+                by_alias=by_alias,
+                exclude_unset=exclude_unset,
+                exclude_defaults=exclude_defaults,
+                exclude_none=exclude_none,
+            )
+
+        @override
+        def model_dump_json(
+            self,
+            *,
+            indent: int | None = None,
+            include: IncEx = None,
+            exclude: IncEx = None,
+            by_alias: bool = False,
+            exclude_unset: bool = False,
+            exclude_defaults: bool = False,
+            exclude_none: bool = False,
+            round_trip: bool = False,
+            warnings: bool = True,
+        ) -> str:
+            """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump_json
+
+            Generates a JSON representation of the model using Pydantic's `to_json` method.
+
+            Args:
+                indent: Indentation to use in the JSON output. If None is passed, the output will be compact.
+                include: Field(s) to include in the JSON output. Can take either a string or set of strings.
+                exclude: Field(s) to exclude from the JSON output. Can take either a string or set of strings.
+                by_alias: Whether to serialize using field aliases.
+                exclude_unset: Whether to exclude fields that have not been explicitly set.
+                exclude_defaults: Whether to exclude fields that have the default value.
+                exclude_none: Whether to exclude fields that have a value of `None`.
+                round_trip: Whether to use serialization/deserialization between JSON and class instance.
+                warnings: Whether to show any warnings that occurred during serialization.
+
+            Returns:
+                A JSON string representation of the model.
+            """
+            if round_trip != False:
+                raise ValueError("round_trip is only supported in Pydantic v2")
+            if warnings != True:
+                raise ValueError("warnings is only supported in Pydantic v2")
+            return super().json(  # type: ignore[reportDeprecated]
+                indent=indent,
+                include=include,
+                exclude=exclude,
+                by_alias=by_alias,
+                exclude_unset=exclude_unset,
+                exclude_defaults=exclude_defaults,
+                exclude_none=exclude_none,
+            )
+
+
+def _construct_field(value: object, field: FieldInfo, key: str) -> object:
+    if value is None:
+        return field_get_default(field)
+
+    if PYDANTIC_V2:
+        type_ = field.annotation
+    else:
+        type_ = cast(type, field.outer_type_)  # type: ignore
+
+    if type_ is None:
+        raise RuntimeError(f"Unexpected field type is None for {key}")
+
+    return construct_type(value=value, type_=type_)
+
+
+def is_basemodel(type_: type) -> bool:
+    """Returns whether or not the given type is either a `BaseModel` or a union of `BaseModel`"""
+    if is_union(type_):
+        for variant in get_args(type_):
+            if is_basemodel(variant):
+                return True
+
+        return False
+
+    return is_basemodel_type(type_)
+
+
+def is_basemodel_type(type_: type) -> TypeGuard[type[BaseModel] | type[GenericModel]]:
+    origin = get_origin(type_) or type_
+    return issubclass(origin, BaseModel) or issubclass(origin, GenericModel)
+
+
+def construct_type(*, value: object, type_: object) -> object:
+    """Loose coercion to the expected type with construction of nested values.
+
+    If the given value does not match the expected type then it is returned as-is.
+    """
+    # we allow `object` as the input type because otherwise, passing things like
+    # `Literal['value']` will be reported as a type error by type checkers
+    type_ = cast("type[object]", type_)
+
+    # unwrap `Annotated[T, ...]` -> `T`
+    if is_annotated_type(type_):
+        meta: tuple[Any, ...] = get_args(type_)[1:]
+        type_ = extract_type_arg(type_, 0)
+    else:
+        meta = tuple()
+
+    # we need to use the origin class for any types that are subscripted generics
+    # e.g. Dict[str, object]
+    origin = get_origin(type_) or type_
+    args = get_args(type_)
+
+    if is_union(origin):
+        try:
+            return validate_type(type_=cast("type[object]", type_), value=value)
+        except Exception:
+            pass
+
+        # if the type is a discriminated union then we want to construct the right variant
+        # in the union, even if the data doesn't match exactly, otherwise we'd break code
+        # that relies on the constructed class types, e.g.
+        #
+        # class FooType:
+        #   kind: Literal['foo']
+        #   value: str
+        #
+        # class BarType:
+        #   kind: Literal['bar']
+        #   value: int
+        #
+        # without this block, if the data we get is something like `{'kind': 'bar', 'value': 'foo'}` then
+        # we'd end up constructing `FooType` when it should be `BarType`.
+        discriminator = _build_discriminated_union_meta(
+            union=type_, meta_annotations=meta
+        )
+        if discriminator and is_mapping(value):
+            variant_value = value.get(
+                discriminator.field_alias_from or discriminator.field_name
+            )
+            if variant_value and isinstance(variant_value, str):
+                variant_type = discriminator.mapping.get(variant_value)
+                if variant_type:
+                    return construct_type(type_=variant_type, value=value)
+
+        # if the data is not valid, use the first variant that doesn't fail while deserializing
+        for variant in args:
+            try:
+                return construct_type(value=value, type_=variant)
+            except Exception:
+                continue
+
+        raise RuntimeError(f"Could not convert data into a valid instance of {type_}")
+
+    if origin == dict:
+        if not is_mapping(value):
+            return value
+
+        _, items_type = get_args(type_)  # Dict[_, items_type]
+        return {
+            key: construct_type(value=item, type_=items_type)
+            for key, item in value.items()
+        }
+
+    if not is_literal_type(type_) and (
+        issubclass(origin, BaseModel) or issubclass(origin, GenericModel)
+    ):
+        if is_list(value):
+            return [
+                cast(Any, type_).construct(**entry) if is_mapping(entry) else entry
+                for entry in value
+            ]
+
+        if is_mapping(value):
+            if issubclass(type_, BaseModel):
+                return type_.construct(**value)  # type: ignore[arg-type]
+
+            return cast(Any, type_).construct(**value)
+
+    if origin == list:
+        if not is_list(value):
+            return value
+
+        inner_type = args[0]  # List[inner_type]
+        return [construct_type(value=entry, type_=inner_type) for entry in value]
+
+    if origin == float:
+        if isinstance(value, int):
+            coerced = float(value)
+            if coerced != value:
+                return value
+            return coerced
+
+        return value
+
+    if type_ == datetime:
+        try:
+            return parse_datetime(value)  # type: ignore
+        except Exception:
+            return value
+
+    if type_ == date:
+        try:
+            return parse_date(value)  # type: ignore
+        except Exception:
+            return value
+
+    return value
+
+
+@runtime_checkable
+class CachedDiscriminatorType(Protocol):
+    __discriminator__: DiscriminatorDetails
+
+
+class DiscriminatorDetails:
+    field_name: str
+    """The name of the discriminator field in the variant class, e.g.
+
+    ```py
+    class Foo(BaseModel):
+        type: Literal['foo']
+    ```
+
+    Will result in field_name='type'
+    """
+
+    field_alias_from: str | None
+    """The name of the discriminator field in the API response, e.g.
+
+    ```py
+    class Foo(BaseModel):
+        type: Literal['foo'] = Field(alias='type_from_api')
+    ```
+
+    Will result in field_alias_from='type_from_api'
+    """
+
+    mapping: dict[str, type]
+    """Mapping of discriminator value to variant type, e.g.
+
+    {'foo': FooVariant, 'bar': BarVariant}
+    """
+
+    def __init__(
+        self,
+        *,
+        mapping: dict[str, type],
+        discriminator_field: str,
+        discriminator_alias: str | None,
+    ) -> None:
+        self.mapping = mapping
+        self.field_name = discriminator_field
+        self.field_alias_from = discriminator_alias
+
+
+def _build_discriminated_union_meta(
+    *, union: type, meta_annotations: tuple[Any, ...]
+) -> DiscriminatorDetails | None:
+    if isinstance(union, CachedDiscriminatorType):
+        return union.__discriminator__
+
+    discriminator_field_name: str | None = None
+
+    for annotation in meta_annotations:
+        if (
+            isinstance(annotation, PropertyInfo)
+            and annotation.discriminator is not None
+        ):
+            discriminator_field_name = annotation.discriminator
+            break
+
+    if not discriminator_field_name:
+        return None
+
+    mapping: dict[str, type] = {}
+    discriminator_alias: str | None = None
+
+    for variant in get_args(union):
+        variant = strip_annotated_type(variant)
+        if is_basemodel_type(variant):
+            if PYDANTIC_V2:
+                field = _extract_field_schema_pv2(variant, discriminator_field_name)
+                if not field:
+                    continue
+
+                # Note: if one variant defines an alias then they all should
+                discriminator_alias = field.get("serialization_alias")
+
+                field_schema = field["schema"]
+
+                if field_schema["type"] == "literal":
+                    for entry in field_schema["expected"]:
+                        if isinstance(entry, str):
+                            mapping[entry] = variant
+            else:
+                field_info = cast("dict[str, FieldInfo]", variant.__fields__).get(
+                    discriminator_field_name
+                )  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+                if not field_info:
+                    continue
+
+                # Note: if one variant defines an alias then they all should
+                discriminator_alias = field_info.alias
+
+                if field_info.annotation and is_literal_type(field_info.annotation):
+                    for entry in get_args(field_info.annotation):
+                        if isinstance(entry, str):
+                            mapping[entry] = variant
+
+    if not mapping:
+        return None
+
+    details = DiscriminatorDetails(
+        mapping=mapping,
+        discriminator_field=discriminator_field_name,
+        discriminator_alias=discriminator_alias,
+    )
+    cast(CachedDiscriminatorType, union).__discriminator__ = details
+    return details
+
+
+def _extract_field_schema_pv2(
+    model: type[BaseModel], field_name: str
+) -> ModelField | None:
+    schema = model.__pydantic_core_schema__
+    if schema["type"] != "model":
+        return None
+
+    fields_schema = schema["schema"]
+    if fields_schema["type"] != "model-fields":
+        return None
+
+    fields_schema = cast("ModelFieldsSchema", fields_schema)
+
+    field = fields_schema["fields"].get(field_name)
+    if not field:
+        return None
+
+    return cast("ModelField", field)  # pyright: ignore[reportUnnecessaryCast]
+
+
+def validate_type(*, type_: type[_T], value: object) -> _T:
+    """Strict validation that the given value matches the expected type"""
+    if inspect.isclass(type_) and issubclass(type_, pydantic.BaseModel):
+        return cast(_T, parse_obj(type_, value))
+
+    return cast(_T, _validate_non_model_type(type_=type_, value=value))
+
+
+# our use of subclasssing here causes weirdness for type checkers,
+# so we just pretend that we don't subclass
+if TYPE_CHECKING:
+    GenericModel = BaseModel
+else:
+
+    class GenericModel(BaseGenericModel, BaseModel):
+        pass
+
+
+if PYDANTIC_V2:
+    from pydantic import TypeAdapter as _TypeAdapter
+
+    _CachedTypeAdapter = cast(
+        "TypeAdapter[object]", lru_cache(maxsize=None)(_TypeAdapter)
+    )
+
+    if TYPE_CHECKING:
+        from pydantic import TypeAdapter
+    else:
+        TypeAdapter = _CachedTypeAdapter
+
+    def _validate_non_model_type(*, type_: type[_T], value: object) -> _T:
+        return TypeAdapter(type_).validate_python(value)
+
+elif not TYPE_CHECKING:  # TODO: condition is weird
+
+    class RootModel(GenericModel, Generic[_T]):
+        """Used as a placeholder to easily convert runtime types to a Pydantic format
+        to provide validation.
+
+        For example:
+        ```py
+        validated = RootModel[int](__root__="5").__root__
+        # validated: 5
+        ```
+        """
+
+        __root__: _T
+
+    def _validate_non_model_type(*, type_: type[_T], value: object) -> _T:
+        model = _create_pydantic_model(type_).validate(value)
+        return cast(_T, model.__root__)
+
+    def _create_pydantic_model(type_: _T) -> Type[RootModel[_T]]:
+        return RootModel[type_]  # type: ignore
+
+
+class FinalRequestOptionsInput(TypedDict, total=False):
+    method: Required[str]
+    url: Required[str]
+    params: Query
+    headers: Headers
+    max_retries: int
+    timeout: float | Timeout | None
+    files: HttpxRequestFiles | None
+    idempotency_key: str
+    json_data: Body
+    extra_json: AnyMapping
+
+
+@final
+class FinalRequestOptions(pydantic.BaseModel):
+    method: str
+    url: str
+    params: Query = {}
+    headers: Union[Headers, NotGiven] = NotGiven()
+    max_retries: Union[int, NotGiven] = NotGiven()
+    timeout: Union[float, Timeout, None, NotGiven] = NotGiven()
+    files: Union[HttpxRequestFiles, None] = None
+    idempotency_key: Union[str, None] = None
+    post_parser: Union[Callable[[Any], Any], NotGiven] = NotGiven()
+
+    # It should be noted that we cannot use `json` here as that would override
+    # a BaseModel method in an incompatible fashion.
+    json_data: Union[Body, None] = None
+    extra_json: Union[AnyMapping, None] = None
+
+    if PYDANTIC_V2:
+        model_config: ClassVar[ConfigDict] = ConfigDict(arbitrary_types_allowed=True)
+    else:
+
+        class Config(pydantic.BaseConfig):  # pyright: ignore[reportDeprecated]
+            arbitrary_types_allowed: bool = True
+
+    def get_max_retries(self, max_retries: int) -> int:
+        if isinstance(self.max_retries, NotGiven):
+            return max_retries
+        return self.max_retries
+
+    def _strip_raw_response_header(self) -> None:
+        if not is_given(self.headers):
+            return
+
+        if self.headers.get(RAW_RESPONSE_HEADER):
+            self.headers = {**self.headers}
+            self.headers.pop(RAW_RESPONSE_HEADER)
+
+    # override the `construct` method so that we can run custom transformations.
+    # this is necessary as we don't want to do any actual runtime type checking
+    # (which means we can't use validators) but we do want to ensure that `NotGiven`
+    # values are not present
+    #
+    # type ignore required because we're adding explicit types to `**values`
+    @classmethod
+    def construct(  # type: ignore
+        cls,
+        _fields_set: set[str] | None = None,
+        **values: Unpack[FinalRequestOptionsInput],
+    ) -> FinalRequestOptions:
+        kwargs: dict[str, Any] = {
+            # we unconditionally call `strip_not_given` on any value
+            # as it will just ignore any non-mapping types
+            key: strip_not_given(value)
+            for key, value in values.items()
+        }
+        if PYDANTIC_V2:
+            return super().model_construct(_fields_set, **kwargs)
+        return cast(
+            FinalRequestOptions, super().construct(_fields_set, **kwargs)
+        )  # pyright: ignore[reportDeprecated]
+
+    if not TYPE_CHECKING:
+        # type checkers incorrectly complain about this assignment
+        model_construct = construct
diff --git a/portkey_ai/_vendor/openai/_module_client.py b/portkey_ai/_vendor/openai/_module_client.py
new file mode 100644
index 00000000..6f7356eb
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_module_client.py
@@ -0,0 +1,85 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import override
+
+from . import resources, _load_client
+from ._utils import LazyProxy
+
+
+class ChatProxy(LazyProxy[resources.Chat]):
+    @override
+    def __load__(self) -> resources.Chat:
+        return _load_client().chat
+
+
+class BetaProxy(LazyProxy[resources.Beta]):
+    @override
+    def __load__(self) -> resources.Beta:
+        return _load_client().beta
+
+
+class FilesProxy(LazyProxy[resources.Files]):
+    @override
+    def __load__(self) -> resources.Files:
+        return _load_client().files
+
+
+class AudioProxy(LazyProxy[resources.Audio]):
+    @override
+    def __load__(self) -> resources.Audio:
+        return _load_client().audio
+
+
+class ImagesProxy(LazyProxy[resources.Images]):
+    @override
+    def __load__(self) -> resources.Images:
+        return _load_client().images
+
+
+class ModelsProxy(LazyProxy[resources.Models]):
+    @override
+    def __load__(self) -> resources.Models:
+        return _load_client().models
+
+
+class BatchesProxy(LazyProxy[resources.Batches]):
+    @override
+    def __load__(self) -> resources.Batches:
+        return _load_client().batches
+
+
+class EmbeddingsProxy(LazyProxy[resources.Embeddings]):
+    @override
+    def __load__(self) -> resources.Embeddings:
+        return _load_client().embeddings
+
+
+class CompletionsProxy(LazyProxy[resources.Completions]):
+    @override
+    def __load__(self) -> resources.Completions:
+        return _load_client().completions
+
+
+class ModerationsProxy(LazyProxy[resources.Moderations]):
+    @override
+    def __load__(self) -> resources.Moderations:
+        return _load_client().moderations
+
+
+class FineTuningProxy(LazyProxy[resources.FineTuning]):
+    @override
+    def __load__(self) -> resources.FineTuning:
+        return _load_client().fine_tuning
+
+
+chat: resources.Chat = ChatProxy().__as_proxied__()
+beta: resources.Beta = BetaProxy().__as_proxied__()
+files: resources.Files = FilesProxy().__as_proxied__()
+audio: resources.Audio = AudioProxy().__as_proxied__()
+images: resources.Images = ImagesProxy().__as_proxied__()
+models: resources.Models = ModelsProxy().__as_proxied__()
+batches: resources.Batches = BatchesProxy().__as_proxied__()
+embeddings: resources.Embeddings = EmbeddingsProxy().__as_proxied__()
+completions: resources.Completions = CompletionsProxy().__as_proxied__()
+moderations: resources.Moderations = ModerationsProxy().__as_proxied__()
+fine_tuning: resources.FineTuning = FineTuningProxy().__as_proxied__()
diff --git a/portkey_ai/_vendor/openai/_qs.py b/portkey_ai/_vendor/openai/_qs.py
new file mode 100644
index 00000000..54a98364
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_qs.py
@@ -0,0 +1,164 @@
+from __future__ import annotations
+
+from typing import Any, List, Tuple, Union, Mapping, TypeVar
+from urllib.parse import parse_qs, urlencode
+from typing_extensions import Literal, get_args
+
+from ._types import NOT_GIVEN, NotGiven, NotGivenOr
+from ._utils import flatten
+
+_T = TypeVar("_T")
+
+
+ArrayFormat = Literal["comma", "repeat", "indices", "brackets"]
+NestedFormat = Literal["dots", "brackets"]
+
+PrimitiveData = Union[str, int, float, bool, None]
+# this should be Data = Union[PrimitiveData, "List[Data]", "Tuple[Data]", "Mapping[str, Data]"]
+# https://github.com/microsoft/pyright/issues/3555
+Data = Union[PrimitiveData, List[Any], Tuple[Any], "Mapping[str, Any]"]
+Params = Mapping[str, Data]
+
+
+class Querystring:
+    array_format: ArrayFormat
+    nested_format: NestedFormat
+
+    def __init__(
+        self,
+        *,
+        array_format: ArrayFormat = "repeat",
+        nested_format: NestedFormat = "brackets",
+    ) -> None:
+        self.array_format = array_format
+        self.nested_format = nested_format
+
+    def parse(self, query: str) -> Mapping[str, object]:
+        # Note: custom format syntax is not supported yet
+        return parse_qs(query)
+
+    def stringify(
+        self,
+        params: Params,
+        *,
+        array_format: NotGivenOr[ArrayFormat] = NOT_GIVEN,
+        nested_format: NotGivenOr[NestedFormat] = NOT_GIVEN,
+    ) -> str:
+        return urlencode(
+            self.stringify_items(
+                params,
+                array_format=array_format,
+                nested_format=nested_format,
+            )
+        )
+
+    def stringify_items(
+        self,
+        params: Params,
+        *,
+        array_format: NotGivenOr[ArrayFormat] = NOT_GIVEN,
+        nested_format: NotGivenOr[NestedFormat] = NOT_GIVEN,
+    ) -> list[tuple[str, str]]:
+        opts = Options(
+            qs=self,
+            array_format=array_format,
+            nested_format=nested_format,
+        )
+        return flatten(
+            [self._stringify_item(key, value, opts) for key, value in params.items()]
+        )
+
+    def _stringify_item(
+        self,
+        key: str,
+        value: Data,
+        opts: Options,
+    ) -> list[tuple[str, str]]:
+        if isinstance(value, Mapping):
+            items: list[tuple[str, str]] = []
+            nested_format = opts.nested_format
+            for subkey, subvalue in value.items():
+                items.extend(
+                    self._stringify_item(
+                        # TODO: error if unknown format
+                        f"{key}.{subkey}"
+                        if nested_format == "dots"
+                        else f"{key}[{subkey}]",
+                        subvalue,
+                        opts,
+                    )
+                )
+            return items
+
+        if isinstance(value, (list, tuple)):
+            array_format = opts.array_format
+            if array_format == "comma":
+                return [
+                    (
+                        key,
+                        ",".join(
+                            self._primitive_value_to_str(item)
+                            for item in value
+                            if item is not None
+                        ),
+                    ),
+                ]
+            elif array_format == "repeat":
+                items = []
+                for item in value:
+                    items.extend(self._stringify_item(key, item, opts))
+                return items
+            elif array_format == "indices":
+                raise NotImplementedError(
+                    "The array indices format is not supported yet"
+                )
+            elif array_format == "brackets":
+                items = []
+                key = key + "[]"
+                for item in value:
+                    items.extend(self._stringify_item(key, item, opts))
+                return items
+            else:
+                raise NotImplementedError(
+                    f"Unknown array_format value: {array_format}, choose from {', '.join(get_args(ArrayFormat))}"
+                )
+
+        serialised = self._primitive_value_to_str(value)
+        if not serialised:
+            return []
+        return [(key, serialised)]
+
+    def _primitive_value_to_str(self, value: PrimitiveData) -> str:
+        # copied from httpx
+        if value is True:
+            return "true"
+        elif value is False:
+            return "false"
+        elif value is None:
+            return ""
+        return str(value)
+
+
+_qs = Querystring()
+parse = _qs.parse
+stringify = _qs.stringify
+stringify_items = _qs.stringify_items
+
+
+class Options:
+    array_format: ArrayFormat
+    nested_format: NestedFormat
+
+    def __init__(
+        self,
+        qs: Querystring = _qs,
+        *,
+        array_format: NotGivenOr[ArrayFormat] = NOT_GIVEN,
+        nested_format: NotGivenOr[NestedFormat] = NOT_GIVEN,
+    ) -> None:
+        self.array_format = (
+            qs.array_format if isinstance(array_format, NotGiven) else array_format
+        )
+        self.nested_format = (
+            qs.nested_format if isinstance(nested_format, NotGiven) else nested_format
+        )
diff --git a/portkey_ai/_vendor/openai/_resource.py b/portkey_ai/_vendor/openai/_resource.py
new file mode 100644
index 00000000..fff9ba19
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_resource.py
@@ -0,0 +1,43 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import time
+from typing import TYPE_CHECKING
+
+import anyio
+
+if TYPE_CHECKING:
+    from ._client import OpenAI, AsyncOpenAI
+
+
+class SyncAPIResource:
+    _client: OpenAI
+
+    def __init__(self, client: OpenAI) -> None:
+        self._client = client
+        self._get = client.get
+        self._post = client.post
+        self._patch = client.patch
+        self._put = client.put
+        self._delete = client.delete
+        self._get_api_list = client.get_api_list
+
+    def _sleep(self, seconds: float) -> None:
+        time.sleep(seconds)
+
+
+class AsyncAPIResource:
+    _client: AsyncOpenAI
+
+    def __init__(self, client: AsyncOpenAI) -> None:
+        self._client = client
+        self._get = client.get
+        self._post = client.post
+        self._patch = client.patch
+        self._put = client.put
+        self._delete = client.delete
+        self._get_api_list = client.get_api_list
+
+    async def _sleep(self, seconds: float) -> None:
+        await anyio.sleep(seconds)
diff --git a/portkey_ai/_vendor/openai/_response.py b/portkey_ai/_vendor/openai/_response.py
new file mode 100644
index 00000000..22e13858
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_response.py
@@ -0,0 +1,894 @@
+from __future__ import annotations
+
+import os
+import inspect
+import logging
+import datetime
+import functools
+from types import TracebackType
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Union,
+    Generic,
+    TypeVar,
+    Callable,
+    Iterator,
+    AsyncIterator,
+    cast,
+    overload,
+)
+from typing_extensions import Awaitable, ParamSpec, override, get_origin
+
+import anyio
+import httpx
+import pydantic
+
+from ._types import NoneType
+from ._utils import (
+    is_given,
+    extract_type_arg,
+    is_annotated_type,
+    extract_type_var_from_base,
+)
+from ._models import BaseModel, is_basemodel
+from ._constants import RAW_RESPONSE_HEADER, OVERRIDE_CAST_TO_HEADER
+from ._streaming import (
+    Stream,
+    AsyncStream,
+    is_stream_class_type,
+    extract_stream_chunk_type,
+)
+from ._exceptions import OpenAIError, APIResponseValidationError
+
+if TYPE_CHECKING:
+    from ._models import FinalRequestOptions
+    from ._base_client import BaseClient
+
+
+P = ParamSpec("P")
+R = TypeVar("R")
+_T = TypeVar("_T")
+_APIResponseT = TypeVar("_APIResponseT", bound="APIResponse[Any]")
+_AsyncAPIResponseT = TypeVar("_AsyncAPIResponseT", bound="AsyncAPIResponse[Any]")
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class BaseAPIResponse(Generic[R]):
+    _cast_to: type[R]
+    _client: BaseClient[Any, Any]
+    _parsed_by_type: dict[type[Any], Any]
+    _is_sse_stream: bool
+    _stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None
+    _options: FinalRequestOptions
+
+    http_response: httpx.Response
+
+    def __init__(
+        self,
+        *,
+        raw: httpx.Response,
+        cast_to: type[R],
+        client: BaseClient[Any, Any],
+        stream: bool,
+        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+        options: FinalRequestOptions,
+    ) -> None:
+        self._cast_to = cast_to
+        self._client = client
+        self._parsed_by_type = {}
+        self._is_sse_stream = stream
+        self._stream_cls = stream_cls
+        self._options = options
+        self.http_response = raw
+
+    @property
+    def headers(self) -> httpx.Headers:
+        return self.http_response.headers
+
+    @property
+    def http_request(self) -> httpx.Request:
+        """Returns the httpx Request instance associated with the current response."""
+        return self.http_response.request
+
+    @property
+    def status_code(self) -> int:
+        return self.http_response.status_code
+
+    @property
+    def url(self) -> httpx.URL:
+        """Returns the URL for which the request was made."""
+        return self.http_response.url
+
+    @property
+    def method(self) -> str:
+        return self.http_request.method
+
+    @property
+    def http_version(self) -> str:
+        return self.http_response.http_version
+
+    @property
+    def elapsed(self) -> datetime.timedelta:
+        """The time taken for the complete request/response cycle to complete."""
+        return self.http_response.elapsed
+
+    @property
+    def is_closed(self) -> bool:
+        """Whether or not the response body has been closed.
+
+        If this is False then there is response data that has not been read yet.
+        You must either fully consume the response body or call `.close()`
+        before discarding the response to prevent resource leaks.
+        """
+        return self.http_response.is_closed
+
+    @override
+    def __repr__(self) -> str:
+        return f"<{self.__class__.__name__} [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_to}>"
+
+    def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        # unwrap `Annotated[T, ...]` -> `T`
+        if to and is_annotated_type(to):
+            to = extract_type_arg(to, 0)
+
+        if self._is_sse_stream:
+            if to:
+                if not is_stream_class_type(to):
+                    raise TypeError(
+                        f"Expected custom parse type to be a subclass of {Stream} or {AsyncStream}"
+                    )
+
+                return cast(
+                    _T,
+                    to(
+                        cast_to=extract_stream_chunk_type(
+                            to,
+                            failure_message="Expected custom stream type to be passed with a type argument, e.g. Stream[ChunkType]",
+                        ),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
+            if self._stream_cls:
+                return cast(
+                    R,
+                    self._stream_cls(
+                        cast_to=extract_stream_chunk_type(self._stream_cls),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
+            stream_cls = cast(
+                "type[Stream[Any]] | type[AsyncStream[Any]] | None",
+                self._client._default_stream_cls,
+            )
+            if stream_cls is None:
+                raise MissingStreamClassError()
+
+            return cast(
+                R,
+                stream_cls(
+                    cast_to=self._cast_to,
+                    response=self.http_response,
+                    client=cast(Any, self._client),
+                ),
+            )
+
+        cast_to = to if to is not None else self._cast_to
+
+        # unwrap `Annotated[T, ...]` -> `T`
+        if is_annotated_type(cast_to):
+            cast_to = extract_type_arg(cast_to, 0)
+
+        if cast_to is NoneType:
+            return cast(R, None)
+
+        response = self.http_response
+        if cast_to == str:
+            return cast(R, response.text)
+
+        if cast_to == bytes:
+            return cast(R, response.content)
+
+        if cast_to == int:
+            return cast(R, int(response.text))
+
+        if cast_to == float:
+            return cast(R, float(response.text))
+
+        origin = get_origin(cast_to) or cast_to
+
+        # handle the legacy binary response case
+        if (
+            inspect.isclass(cast_to)
+            and cast_to.__name__ == "HttpxBinaryResponseContent"
+        ):
+            return cast(R, cast_to(response))  # type: ignore
+
+        if origin == APIResponse:
+            raise RuntimeError("Unexpected state - cast_to is `APIResponse`")
+
+        if inspect.isclass(origin) and issubclass(origin, httpx.Response):
+            # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response
+            # and pass that class to our request functions. We cannot change the variance to be either
+            # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct
+            # the response class ourselves but that is something that should be supported directly in httpx
+            # as it would be easy to incorrectly construct the Response object due to the multitude of arguments.
+            if cast_to != httpx.Response:
+                raise ValueError(
+                    f"Subclasses of httpx.Response cannot be passed to `cast_to`"
+                )
+            return cast(R, response)
+
+        if (
+            inspect.isclass(origin)
+            and not issubclass(origin, BaseModel)
+            and issubclass(origin, pydantic.BaseModel)
+        ):
+            raise TypeError(
+                "Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`"
+            )
+
+        if (
+            cast_to is not object
+            and not origin is list
+            and not origin is dict
+            and not origin is Union
+            and not issubclass(origin, BaseModel)
+        ):
+            raise RuntimeError(
+                f"Unsupported type, expected {cast_to} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}."
+            )
+
+        # split is required to handle cases where additional information is included
+        # in the response, e.g. application/json; charset=utf-8
+        content_type, *_ = response.headers.get("content-type", "*").split(";")
+        if content_type != "application/json":
+            if is_basemodel(cast_to):
+                try:
+                    data = response.json()
+                except Exception as exc:
+                    log.debug(
+                        "Could not read JSON from response data due to %s - %s",
+                        type(exc),
+                        exc,
+                    )
+                else:
+                    return self._client._process_response_data(
+                        data=data,
+                        cast_to=cast_to,  # type: ignore
+                        response=response,
+                    )
+
+            if self._client._strict_response_validation:
+                raise APIResponseValidationError(
+                    response=response,
+                    message=f"Expected Content-Type response header to be `application/json` but received `{content_type}` instead.",
+                    body=response.text,
+                )
+
+            # If the API responds with content that isn't JSON then we just return
+            # the (decoded) text without performing any parsing so that you can still
+            # handle the response however you need to.
+            return response.text  # type: ignore
+
+        data = response.json()
+
+        return self._client._process_response_data(
+            data=data,
+            cast_to=cast_to,  # type: ignore
+            response=response,
+        )
+
+
+class APIResponse(BaseAPIResponse[R]):
+    @property
+    def request_id(self) -> str | None:
+        return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
+
+    @overload
+    def parse(self, *, to: type[_T]) -> _T:
+        ...
+
+    @overload
+    def parse(self) -> R:
+        ...
+
+    def parse(self, *, to: type[_T] | None = None) -> R | _T:
+        """Returns the rich python representation of this response's data.
+
+        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+        You can customise the type that the response is parsed into through
+        the `to` argument, e.g.
+
+        ```py
+        from openai import BaseModel
+
+
+        class MyModel(BaseModel):
+            foo: str
+
+
+        obj = response.parse(to=MyModel)
+        print(obj.foo)
+        ```
+
+        We support parsing:
+          - `BaseModel`
+          - `dict`
+          - `list`
+          - `Union`
+          - `str`
+          - `int`
+          - `float`
+          - `httpx.Response`
+        """
+        cache_key = to if to is not None else self._cast_to
+        cached = self._parsed_by_type.get(cache_key)
+        if cached is not None:
+            return cached  # type: ignore[no-any-return]
+
+        if not self._is_sse_stream:
+            self.read()
+
+        parsed = self._parse(to=to)
+        if is_given(self._options.post_parser):
+            parsed = self._options.post_parser(parsed)
+
+        self._parsed_by_type[cache_key] = parsed
+        return parsed
+
+    def read(self) -> bytes:
+        """Read and return the binary response content."""
+        try:
+            return self.http_response.read()
+        except httpx.StreamConsumed as exc:
+            # The default error raised by httpx isn't very
+            # helpful in our case so we re-raise it with
+            # a different error message.
+            raise StreamAlreadyConsumed() from exc
+
+    def text(self) -> str:
+        """Read and decode the response content into a string."""
+        self.read()
+        return self.http_response.text
+
+    def json(self) -> object:
+        """Read and decode the JSON response content."""
+        self.read()
+        return self.http_response.json()
+
+    def close(self) -> None:
+        """Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        self.http_response.close()
+
+    def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        """
+        A byte-iterator over the decoded response content.
+
+        This automatically handles gzip, deflate and brotli encoded responses.
+        """
+        for chunk in self.http_response.iter_bytes(chunk_size):
+            yield chunk
+
+    def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+        """A str-iterator over the decoded response content
+        that handles both gzip, deflate, etc but also detects the content's
+        string encoding.
+        """
+        for chunk in self.http_response.iter_text(chunk_size):
+            yield chunk
+
+    def iter_lines(self) -> Iterator[str]:
+        """Like `iter_text()` but will only yield chunks for each line"""
+        for chunk in self.http_response.iter_lines():
+            yield chunk
+
+
+class AsyncAPIResponse(BaseAPIResponse[R]):
+    @property
+    def request_id(self) -> str | None:
+        return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
+
+    @overload
+    async def parse(self, *, to: type[_T]) -> _T:
+        ...
+
+    @overload
+    async def parse(self) -> R:
+        ...
+
+    async def parse(self, *, to: type[_T] | None = None) -> R | _T:
+        """Returns the rich python representation of this response's data.
+
+        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+        You can customise the type that the response is parsed into through
+        the `to` argument, e.g.
+
+        ```py
+        from openai import BaseModel
+
+
+        class MyModel(BaseModel):
+            foo: str
+
+
+        obj = response.parse(to=MyModel)
+        print(obj.foo)
+        ```
+
+        We support parsing:
+          - `BaseModel`
+          - `dict`
+          - `list`
+          - `Union`
+          - `str`
+          - `httpx.Response`
+        """
+        cache_key = to if to is not None else self._cast_to
+        cached = self._parsed_by_type.get(cache_key)
+        if cached is not None:
+            return cached  # type: ignore[no-any-return]
+
+        if not self._is_sse_stream:
+            await self.read()
+
+        parsed = self._parse(to=to)
+        if is_given(self._options.post_parser):
+            parsed = self._options.post_parser(parsed)
+
+        self._parsed_by_type[cache_key] = parsed
+        return parsed
+
+    async def read(self) -> bytes:
+        """Read and return the binary response content."""
+        try:
+            return await self.http_response.aread()
+        except httpx.StreamConsumed as exc:
+            # the default error raised by httpx isn't very
+            # helpful in our case so we re-raise it with
+            # a different error message
+            raise StreamAlreadyConsumed() from exc
+
+    async def text(self) -> str:
+        """Read and decode the response content into a string."""
+        await self.read()
+        return self.http_response.text
+
+    async def json(self) -> object:
+        """Read and decode the JSON response content."""
+        await self.read()
+        return self.http_response.json()
+
+    async def close(self) -> None:
+        """Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        await self.http_response.aclose()
+
+    async def iter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        """
+        A byte-iterator over the decoded response content.
+
+        This automatically handles gzip, deflate and brotli encoded responses.
+        """
+        async for chunk in self.http_response.aiter_bytes(chunk_size):
+            yield chunk
+
+    async def iter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
+        """A str-iterator over the decoded response content
+        that handles both gzip, deflate, etc but also detects the content's
+        string encoding.
+        """
+        async for chunk in self.http_response.aiter_text(chunk_size):
+            yield chunk
+
+    async def iter_lines(self) -> AsyncIterator[str]:
+        """Like `iter_text()` but will only yield chunks for each line"""
+        async for chunk in self.http_response.aiter_lines():
+            yield chunk
+
+
+class BinaryAPIResponse(APIResponse[bytes]):
+    """Subclass of APIResponse providing helpers for dealing with binary data.
+
+    Note: If you want to stream the response data instead of eagerly reading it
+    all at once then you should use `.with_streaming_response` when making
+    the API request, e.g. `.with_streaming_response.get_binary_response()`
+    """
+
+    def write_to_file(
+        self,
+        file: str | os.PathLike[str],
+    ) -> None:
+        """Write the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+
+        Note: if you want to stream the data to the file instead of writing
+        all at once then you should use `.with_streaming_response` when making
+        the API request, e.g. `.with_streaming_response.get_binary_response()`
+        """
+        with open(file, mode="wb") as f:
+            for data in self.iter_bytes():
+                f.write(data)
+
+
+class AsyncBinaryAPIResponse(AsyncAPIResponse[bytes]):
+    """Subclass of APIResponse providing helpers for dealing with binary data.
+
+    Note: If you want to stream the response data instead of eagerly reading it
+    all at once then you should use `.with_streaming_response` when making
+    the API request, e.g. `.with_streaming_response.get_binary_response()`
+    """
+
+    async def write_to_file(
+        self,
+        file: str | os.PathLike[str],
+    ) -> None:
+        """Write the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+
+        Note: if you want to stream the data to the file instead of writing
+        all at once then you should use `.with_streaming_response` when making
+        the API request, e.g. `.with_streaming_response.get_binary_response()`
+        """
+        path = anyio.Path(file)
+        async with await path.open(mode="wb") as f:
+            async for data in self.iter_bytes():
+                await f.write(data)
+
+
+class StreamedBinaryAPIResponse(APIResponse[bytes]):
+    def stream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        """Streams the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+        """
+        with open(file, mode="wb") as f:
+            for data in self.iter_bytes(chunk_size):
+                f.write(data)
+
+
+class AsyncStreamedBinaryAPIResponse(AsyncAPIResponse[bytes]):
+    async def stream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        """Streams the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+        """
+        path = anyio.Path(file)
+        async with await path.open(mode="wb") as f:
+            async for data in self.iter_bytes(chunk_size):
+                await f.write(data)
+
+
+class MissingStreamClassError(TypeError):
+    def __init__(self) -> None:
+        super().__init__(
+            "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `openai._streaming` for reference",
+        )
+
+
+class StreamAlreadyConsumed(OpenAIError):
+    """
+    Attempted to read or stream content, but the content has already
+    been streamed.
+
+    This can happen if you use a method like `.iter_lines()` and then attempt
+    to read th entire response body afterwards, e.g.
+
+    ```py
+    response = await client.post(...)
+    async for line in response.iter_lines():
+        ...  # do something with `line`
+
+    content = await response.read()
+    # ^ error
+    ```
+
+    If you want this behaviour you'll need to either manually accumulate the response
+    content or call `await response.read()` before iterating over the stream.
+    """
+
+    def __init__(self) -> None:
+        message = (
+            "Attempted to read or stream some content, but the content has "
+            "already been streamed. "
+            "This could be due to attempting to stream the response "
+            "content more than once."
+            "\n\n"
+            "You can fix this by manually accumulating the response content while streaming "
+            "or by calling `.read()` before starting to stream."
+        )
+        super().__init__(message)
+
+
+class ResponseContextManager(Generic[_APIResponseT]):
+    """Context manager for ensuring that a request is not made
+    until it is entered and that the response will always be closed
+    when the context manager exits
+    """
+
+    def __init__(self, request_func: Callable[[], _APIResponseT]) -> None:
+        self._request_func = request_func
+        self.__response: _APIResponseT | None = None
+
+    def __enter__(self) -> _APIResponseT:
+        self.__response = self._request_func()
+        return self.__response
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__response is not None:
+            self.__response.close()
+
+
+class AsyncResponseContextManager(Generic[_AsyncAPIResponseT]):
+    """Context manager for ensuring that a request is not made
+    until it is entered and that the response will always be closed
+    when the context manager exits
+    """
+
+    def __init__(self, api_request: Awaitable[_AsyncAPIResponseT]) -> None:
+        self._api_request = api_request
+        self.__response: _AsyncAPIResponseT | None = None
+
+    async def __aenter__(self) -> _AsyncAPIResponseT:
+        self.__response = await self._api_request
+        return self.__response
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__response is not None:
+            await self.__response.close()
+
+
+def to_streamed_response_wrapper(
+    func: Callable[P, R]
+) -> Callable[P, ResponseContextManager[APIResponse[R]]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support streaming and returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    def wrapped(
+        *args: P.args, **kwargs: P.kwargs
+    ) -> ResponseContextManager[APIResponse[R]]:
+        extra_headers: dict[str, str] = {
+            **(cast(Any, kwargs.get("extra_headers")) or {})
+        }
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = functools.partial(func, *args, **kwargs)
+
+        return ResponseContextManager(cast(Callable[[], APIResponse[R]], make_request))
+
+    return wrapped
+
+
+def async_to_streamed_response_wrapper(
+    func: Callable[P, Awaitable[R]],
+) -> Callable[P, AsyncResponseContextManager[AsyncAPIResponse[R]]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support streaming and returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    def wrapped(
+        *args: P.args, **kwargs: P.kwargs
+    ) -> AsyncResponseContextManager[AsyncAPIResponse[R]]:
+        extra_headers: dict[str, str] = {
+            **(cast(Any, kwargs.get("extra_headers")) or {})
+        }
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = func(*args, **kwargs)
+
+        return AsyncResponseContextManager(
+            cast(Awaitable[AsyncAPIResponse[R]], make_request)
+        )
+
+    return wrapped
+
+
+def to_custom_streamed_response_wrapper(
+    func: Callable[P, object],
+    response_cls: type[_APIResponseT],
+) -> Callable[P, ResponseContextManager[_APIResponseT]]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support streaming and returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(
+        *args: P.args, **kwargs: P.kwargs
+    ) -> ResponseContextManager[_APIResponseT]:
+        extra_headers: dict[str, Any] = {
+            **(cast(Any, kwargs.get("extra_headers")) or {})
+        }
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = functools.partial(func, *args, **kwargs)
+
+        return ResponseContextManager(cast(Callable[[], _APIResponseT], make_request))
+
+    return wrapped
+
+
+def async_to_custom_streamed_response_wrapper(
+    func: Callable[P, Awaitable[object]],
+    response_cls: type[_AsyncAPIResponseT],
+) -> Callable[P, AsyncResponseContextManager[_AsyncAPIResponseT]]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support streaming and returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(
+        *args: P.args, **kwargs: P.kwargs
+    ) -> AsyncResponseContextManager[_AsyncAPIResponseT]:
+        extra_headers: dict[str, Any] = {
+            **(cast(Any, kwargs.get("extra_headers")) or {})
+        }
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = func(*args, **kwargs)
+
+        return AsyncResponseContextManager(
+            cast(Awaitable[_AsyncAPIResponseT], make_request)
+        )
+
+    return wrapped
+
+
+def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, APIResponse[R]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> APIResponse[R]:
+        extra_headers: dict[str, str] = {
+            **(cast(Any, kwargs.get("extra_headers")) or {})
+        }
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(APIResponse[R], func(*args, **kwargs))
+
+    return wrapped
+
+
+def async_to_raw_response_wrapper(
+    func: Callable[P, Awaitable[R]]
+) -> Callable[P, Awaitable[AsyncAPIResponse[R]]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    async def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncAPIResponse[R]:
+        extra_headers: dict[str, str] = {
+            **(cast(Any, kwargs.get("extra_headers")) or {})
+        }
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(AsyncAPIResponse[R], await func(*args, **kwargs))
+
+    return wrapped
+
+
+def to_custom_raw_response_wrapper(
+    func: Callable[P, object],
+    response_cls: type[_APIResponseT],
+) -> Callable[P, _APIResponseT]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> _APIResponseT:
+        extra_headers: dict[str, Any] = {
+            **(cast(Any, kwargs.get("extra_headers")) or {})
+        }
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(_APIResponseT, func(*args, **kwargs))
+
+    return wrapped
+
+
+def async_to_custom_raw_response_wrapper(
+    func: Callable[P, Awaitable[object]],
+    response_cls: type[_AsyncAPIResponseT],
+) -> Callable[P, Awaitable[_AsyncAPIResponseT]]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> Awaitable[_AsyncAPIResponseT]:
+        extra_headers: dict[str, Any] = {
+            **(cast(Any, kwargs.get("extra_headers")) or {})
+        }
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(Awaitable[_AsyncAPIResponseT], func(*args, **kwargs))
+
+    return wrapped
+
+
+def extract_response_type(typ: type[BaseAPIResponse[Any]]) -> type:
+    """Given a type like `APIResponse[T]`, returns the generic type variable `T`.
+
+    This also handles the case where a concrete subclass is given, e.g.
+    ```py
+    class MyResponse(APIResponse[bytes]):
+        ...
+
+    extract_response_type(MyResponse) -> bytes
+    ```
+    """
+    return extract_type_var_from_base(
+        typ,
+        generic_bases=cast(
+            "tuple[type, ...]", (BaseAPIResponse, APIResponse, AsyncAPIResponse)
+        ),
+        index=0,
+    )
diff --git a/portkey_ai/_vendor/openai/_streaming.py b/portkey_ai/_vendor/openai/_streaming.py
new file mode 100644
index 00000000..20c7ecb6
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_streaming.py
@@ -0,0 +1,438 @@
+# Note: initially copied from https://github.com/florimondmanca/httpx-sse/blob/master/src/httpx_sse/_decoders.py
+from __future__ import annotations
+
+import json
+import inspect
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Generic, TypeVar, Iterator, AsyncIterator, cast
+from typing_extensions import (
+    Self,
+    Protocol,
+    TypeGuard,
+    override,
+    get_origin,
+    runtime_checkable,
+)
+
+import httpx
+
+from ._utils import is_mapping, extract_type_var_from_base
+from ._exceptions import APIError
+
+if TYPE_CHECKING:
+    from ._client import OpenAI, AsyncOpenAI
+
+
+_T = TypeVar("_T")
+
+
+class Stream(Generic[_T]):
+    """Provides the core interface to iterate over a synchronous stream response."""
+
+    response: httpx.Response
+
+    _decoder: SSEBytesDecoder
+
+    def __init__(
+        self,
+        *,
+        cast_to: type[_T],
+        response: httpx.Response,
+        client: OpenAI,
+    ) -> None:
+        self.response = response
+        self._cast_to = cast_to
+        self._client = client
+        self._decoder = client._make_sse_decoder()
+        self._iterator = self.__stream__()
+
+    def __next__(self) -> _T:
+        return self._iterator.__next__()
+
+    def __iter__(self) -> Iterator[_T]:
+        for item in self._iterator:
+            yield item
+
+    def _iter_events(self) -> Iterator[ServerSentEvent]:
+        yield from self._decoder.iter_bytes(self.response.iter_bytes())
+
+    def __stream__(self) -> Iterator[_T]:
+        cast_to = cast(Any, self._cast_to)
+        response = self.response
+        process_data = self._client._process_response_data
+        iterator = self._iter_events()
+
+        for sse in iterator:
+            if sse.data.startswith("[DONE]"):
+                break
+
+            if sse.event is None:
+                data = sse.json()
+                if is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
+
+                    raise APIError(
+                        message=message,
+                        request=self.response.request,
+                        body=data["error"],
+                    )
+
+                yield process_data(data=data, cast_to=cast_to, response=response)
+
+            else:
+                data = sse.json()
+
+                if sse.event == "error" and is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
+
+                    raise APIError(
+                        message=message,
+                        request=self.response.request,
+                        body=data["error"],
+                    )
+
+                yield process_data(
+                    data={"data": data, "event": sse.event},
+                    cast_to=cast_to,
+                    response=response,
+                )
+
+        # Ensure the entire stream is consumed
+        for _sse in iterator:
+            ...
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        self.close()
+
+    def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        self.response.close()
+
+
+class AsyncStream(Generic[_T]):
+    """Provides the core interface to iterate over an asynchronous stream response."""
+
+    response: httpx.Response
+
+    _decoder: SSEDecoder | SSEBytesDecoder
+
+    def __init__(
+        self,
+        *,
+        cast_to: type[_T],
+        response: httpx.Response,
+        client: AsyncOpenAI,
+    ) -> None:
+        self.response = response
+        self._cast_to = cast_to
+        self._client = client
+        self._decoder = client._make_sse_decoder()
+        self._iterator = self.__stream__()
+
+    async def __anext__(self) -> _T:
+        return await self._iterator.__anext__()
+
+    async def __aiter__(self) -> AsyncIterator[_T]:
+        async for item in self._iterator:
+            yield item
+
+    async def _iter_events(self) -> AsyncIterator[ServerSentEvent]:
+        async for sse in self._decoder.aiter_bytes(self.response.aiter_bytes()):
+            yield sse
+
+    async def __stream__(self) -> AsyncIterator[_T]:
+        cast_to = cast(Any, self._cast_to)
+        response = self.response
+        process_data = self._client._process_response_data
+        iterator = self._iter_events()
+
+        async for sse in iterator:
+            if sse.data.startswith("[DONE]"):
+                break
+
+            if sse.event is None:
+                data = sse.json()
+                if is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
+
+                    raise APIError(
+                        message=message,
+                        request=self.response.request,
+                        body=data["error"],
+                    )
+
+                yield process_data(data=data, cast_to=cast_to, response=response)
+
+            else:
+                data = sse.json()
+
+                if sse.event == "error" and is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
+
+                    raise APIError(
+                        message=message,
+                        request=self.response.request,
+                        body=data["error"],
+                    )
+
+                yield process_data(
+                    data={"data": data, "event": sse.event},
+                    cast_to=cast_to,
+                    response=response,
+                )
+
+        # Ensure the entire stream is consumed
+        async for _sse in iterator:
+            ...
+
+    async def __aenter__(self) -> Self:
+        return self
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        await self.close()
+
+    async def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        await self.response.aclose()
+
+
+class ServerSentEvent:
+    def __init__(
+        self,
+        *,
+        event: str | None = None,
+        data: str | None = None,
+        id: str | None = None,
+        retry: int | None = None,
+    ) -> None:
+        if data is None:
+            data = ""
+
+        self._id = id
+        self._data = data
+        self._event = event or None
+        self._retry = retry
+
+    @property
+    def event(self) -> str | None:
+        return self._event
+
+    @property
+    def id(self) -> str | None:
+        return self._id
+
+    @property
+    def retry(self) -> int | None:
+        return self._retry
+
+    @property
+    def data(self) -> str:
+        return self._data
+
+    def json(self) -> Any:
+        return json.loads(self.data)
+
+    @override
+    def __repr__(self) -> str:
+        return f"ServerSentEvent(event={self.event}, data={self.data}, id={self.id}, retry={self.retry})"
+
+
+class SSEDecoder:
+    _data: list[str]
+    _event: str | None
+    _retry: int | None
+    _last_event_id: str | None
+
+    def __init__(self) -> None:
+        self._event = None
+        self._data = []
+        self._last_event_id = None
+        self._retry = None
+
+    def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[ServerSentEvent]:
+        """Given an iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        for chunk in self._iter_chunks(iterator):
+            # Split before decoding so splitlines() only uses \r and \n
+            for raw_line in chunk.splitlines():
+                line = raw_line.decode("utf-8")
+                sse = self.decode(line)
+                if sse:
+                    yield sse
+
+    def _iter_chunks(self, iterator: Iterator[bytes]) -> Iterator[bytes]:
+        """Given an iterator that yields raw binary data, iterate over it and yield individual SSE chunks"""
+        data = b""
+        for chunk in iterator:
+            for line in chunk.splitlines(keepends=True):
+                data += line
+                if data.endswith((b"\r\r", b"\n\n", b"\r\n\r\n")):
+                    yield data
+                    data = b""
+        if data:
+            yield data
+
+    async def aiter_bytes(
+        self, iterator: AsyncIterator[bytes]
+    ) -> AsyncIterator[ServerSentEvent]:
+        """Given an iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        async for chunk in self._aiter_chunks(iterator):
+            # Split before decoding so splitlines() only uses \r and \n
+            for raw_line in chunk.splitlines():
+                line = raw_line.decode("utf-8")
+                sse = self.decode(line)
+                if sse:
+                    yield sse
+
+    async def _aiter_chunks(
+        self, iterator: AsyncIterator[bytes]
+    ) -> AsyncIterator[bytes]:
+        """Given an iterator that yields raw binary data, iterate over it and yield individual SSE chunks"""
+        data = b""
+        async for chunk in iterator:
+            for line in chunk.splitlines(keepends=True):
+                data += line
+                if data.endswith((b"\r\r", b"\n\n", b"\r\n\r\n")):
+                    yield data
+                    data = b""
+        if data:
+            yield data
+
+    def decode(self, line: str) -> ServerSentEvent | None:
+        # See: https://html.spec.whatwg.org/multipage/server-sent-events.html#event-stream-interpretation  # noqa: E501
+
+        if not line:
+            if (
+                not self._event
+                and not self._data
+                and not self._last_event_id
+                and self._retry is None
+            ):
+                return None
+
+            sse = ServerSentEvent(
+                event=self._event,
+                data="\n".join(self._data),
+                id=self._last_event_id,
+                retry=self._retry,
+            )
+
+            # NOTE: as per the SSE spec, do not reset last_event_id.
+            self._event = None
+            self._data = []
+            self._retry = None
+
+            return sse
+
+        if line.startswith(":"):
+            return None
+
+        fieldname, _, value = line.partition(":")
+
+        if value.startswith(" "):
+            value = value[1:]
+
+        if fieldname == "event":
+            self._event = value
+        elif fieldname == "data":
+            self._data.append(value)
+        elif fieldname == "id":
+            if "\0" in value:
+                pass
+            else:
+                self._last_event_id = value
+        elif fieldname == "retry":
+            try:
+                self._retry = int(value)
+            except (TypeError, ValueError):
+                pass
+        else:
+            pass  # Field is ignored.
+
+        return None
+
+
+@runtime_checkable
+class SSEBytesDecoder(Protocol):
+    def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[ServerSentEvent]:
+        """Given an iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        ...
+
+    def aiter_bytes(
+        self, iterator: AsyncIterator[bytes]
+    ) -> AsyncIterator[ServerSentEvent]:
+        """Given an async iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        ...
+
+
+def is_stream_class_type(
+    typ: type,
+) -> TypeGuard[type[Stream[object]] | type[AsyncStream[object]]]:
+    """TypeGuard for determining whether or not the given type is a subclass of `Stream` / `AsyncStream`"""
+    origin = get_origin(typ) or typ
+    return inspect.isclass(origin) and issubclass(origin, (Stream, AsyncStream))
+
+
+def extract_stream_chunk_type(
+    stream_cls: type,
+    *,
+    failure_message: str | None = None,
+) -> type:
+    """Given a type like `Stream[T]`, returns the generic type variable `T`.
+
+    This also handles the case where a concrete subclass is given, e.g.
+    ```py
+    class MyStream(Stream[bytes]):
+        ...
+
+    extract_stream_chunk_type(MyStream) -> bytes
+    ```
+    """
+    from ._base_client import Stream, AsyncStream
+
+    return extract_type_var_from_base(
+        stream_cls,
+        index=0,
+        generic_bases=cast("tuple[type, ...]", (Stream, AsyncStream)),
+        failure_message=failure_message,
+    )
diff --git a/portkey_ai/_vendor/openai/_types.py b/portkey_ai/_vendor/openai/_types.py
new file mode 100644
index 00000000..d84d9da4
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_types.py
@@ -0,0 +1,233 @@
+from __future__ import annotations
+
+from os import PathLike
+from typing import (
+    IO,
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    List,
+    Type,
+    Tuple,
+    Union,
+    Mapping,
+    TypeVar,
+    Callable,
+    Optional,
+    Sequence,
+)
+from typing_extensions import (
+    Literal,
+    Protocol,
+    TypeAlias,
+    TypedDict,
+    override,
+    runtime_checkable,
+)
+
+import httpx
+import pydantic
+from httpx import URL, Proxy, Timeout, Response, BaseTransport, AsyncBaseTransport
+
+if TYPE_CHECKING:
+    from ._models import BaseModel
+    from ._response import APIResponse, AsyncAPIResponse
+    from ._legacy_response import HttpxBinaryResponseContent
+
+Transport = BaseTransport
+AsyncTransport = AsyncBaseTransport
+Query = Mapping[str, object]
+Body = object
+AnyMapping = Mapping[str, object]
+ModelT = TypeVar("ModelT", bound=pydantic.BaseModel)
+_T = TypeVar("_T")
+
+
+# Approximates httpx internal ProxiesTypes and RequestFiles types
+# while adding support for `PathLike` instances
+ProxiesDict = Dict["str | URL", Union[None, str, URL, Proxy]]
+ProxiesTypes = Union[str, Proxy, ProxiesDict]
+if TYPE_CHECKING:
+    Base64FileInput = Union[IO[bytes], PathLike[str]]
+    FileContent = Union[IO[bytes], bytes, PathLike[str]]
+else:
+    Base64FileInput = Union[IO[bytes], PathLike]
+    FileContent = Union[
+        IO[bytes], bytes, PathLike
+    ]  # PathLike is not subscriptable in Python 3.8.
+FileTypes = Union[
+    # file (or bytes)
+    FileContent,
+    # (filename, file (or bytes))
+    Tuple[Optional[str], FileContent],
+    # (filename, file (or bytes), content_type)
+    Tuple[Optional[str], FileContent, Optional[str]],
+    # (filename, file (or bytes), content_type, headers)
+    Tuple[Optional[str], FileContent, Optional[str], Mapping[str, str]],
+]
+RequestFiles = Union[Mapping[str, FileTypes], Sequence[Tuple[str, FileTypes]]]
+
+# duplicate of the above but without our custom file support
+HttpxFileContent = Union[IO[bytes], bytes]
+HttpxFileTypes = Union[
+    # file (or bytes)
+    HttpxFileContent,
+    # (filename, file (or bytes))
+    Tuple[Optional[str], HttpxFileContent],
+    # (filename, file (or bytes), content_type)
+    Tuple[Optional[str], HttpxFileContent, Optional[str]],
+    # (filename, file (or bytes), content_type, headers)
+    Tuple[Optional[str], HttpxFileContent, Optional[str], Mapping[str, str]],
+]
+HttpxRequestFiles = Union[
+    Mapping[str, HttpxFileTypes], Sequence[Tuple[str, HttpxFileTypes]]
+]
+
+# Workaround to support (cast_to: Type[ResponseT]) -> ResponseT
+# where ResponseT includes `None`. In order to support directly
+# passing `None`, overloads would have to be defined for every
+# method that uses `ResponseT` which would lead to an unacceptable
+# amount of code duplication and make it unreadable. See _base_client.py
+# for example usage.
+#
+# This unfortunately means that you will either have
+# to import this type and pass it explicitly:
+#
+# from openai import NoneType
+# client.get('/foo', cast_to=NoneType)
+#
+# or build it yourself:
+#
+# client.get('/foo', cast_to=type(None))
+if TYPE_CHECKING:
+    NoneType: Type[None]
+else:
+    NoneType = type(None)
+
+
+class RequestOptions(TypedDict, total=False):
+    headers: Headers
+    max_retries: int
+    timeout: float | Timeout | None
+    params: Query
+    extra_json: AnyMapping
+    idempotency_key: str
+
+
+# Sentinel class used until PEP 0661 is accepted
+class NotGiven:
+    """
+    A sentinel singleton class used to distinguish omitted keyword arguments
+    from those passed in with the value None (which may have different behavior).
+
+    For example:
+
+    ```py
+    def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
+        ...
+
+
+    get(timeout=1)  # 1s timeout
+    get(timeout=None)  # No timeout
+    get()  # Default timeout behavior, which may not be statically known at the method definition.
+    ```
+    """
+
+    def __bool__(self) -> Literal[False]:
+        return False
+
+    @override
+    def __repr__(self) -> str:
+        return "NOT_GIVEN"
+
+
+NotGivenOr = Union[_T, NotGiven]
+NOT_GIVEN = NotGiven()
+
+
+class Omit:
+    """In certain situations you need to be able to represent a case where a default value has
+    to be explicitly removed and `None` is not an appropriate substitute, for example:
+
+    ```py
+    # as the default `Content-Type` header is `application/json` that will be sent
+    client.post("/upload/files", files={"file": b"my raw file content"})
+
+    # you can't explicitly override the header as it has to be dynamically generated
+    # to look something like: 'multipart/form-data; boundary=0d8382fcf5f8c3be01ca2e11002d2983'
+    client.post(..., headers={"Content-Type": "multipart/form-data"})
+
+    # instead you can remove the default `application/json` header by passing Omit
+    client.post(..., headers={"Content-Type": Omit()})
+    ```
+    """
+
+    def __bool__(self) -> Literal[False]:
+        return False
+
+
+@runtime_checkable
+class ModelBuilderProtocol(Protocol):
+    @classmethod
+    def build(
+        cls: type[_T],
+        *,
+        response: Response,
+        data: object,
+    ) -> _T:
+        ...
+
+
+Headers = Mapping[str, Union[str, Omit]]
+
+
+class HeadersLikeProtocol(Protocol):
+    def get(self, __key: str) -> str | None:
+        ...
+
+
+HeadersLike = Union[Headers, HeadersLikeProtocol]
+
+ResponseT = TypeVar(
+    "ResponseT",
+    bound=Union[
+        object,
+        str,
+        None,
+        "BaseModel",
+        List[Any],
+        Dict[str, Any],
+        Response,
+        ModelBuilderProtocol,
+        "APIResponse[Any]",
+        "AsyncAPIResponse[Any]",
+        "HttpxBinaryResponseContent",
+    ],
+)
+
+StrBytesIntFloat = Union[str, bytes, int, float]
+
+# Note: copied from Pydantic
+# https://github.com/pydantic/pydantic/blob/32ea570bf96e84234d2992e1ddf40ab8a565925a/pydantic/main.py#L49
+IncEx: TypeAlias = "set[int] | set[str] | dict[int, Any] | dict[str, Any] | None"
+
+PostParser = Callable[[Any], Any]
+
+
+@runtime_checkable
+class InheritsGeneric(Protocol):
+    """Represents a type that has inherited from `Generic`
+
+    The `__orig_bases__` property can be used to determine the resolved
+    type variable for a given base class.
+    """
+
+    __orig_bases__: tuple[_GenericAlias]
+
+
+class _GenericAlias(Protocol):
+    __origin__: type[object]
+
+
+class HttpxSendArgs(TypedDict, total=False):
+    auth: httpx.Auth
diff --git a/portkey_ai/_vendor/openai/_utils/__init__.py b/portkey_ai/_vendor/openai/_utils/__init__.py
new file mode 100644
index 00000000..475bdac2
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_utils/__init__.py
@@ -0,0 +1,54 @@
+from ._sync import asyncify as asyncify
+from ._proxy import LazyProxy as LazyProxy
+from ._utils import (
+    flatten as flatten,
+    is_dict as is_dict,
+    is_list as is_list,
+    is_given as is_given,
+    is_tuple as is_tuple,
+    lru_cache as lru_cache,
+    is_mapping as is_mapping,
+    is_tuple_t as is_tuple_t,
+    parse_date as parse_date,
+    is_iterable as is_iterable,
+    is_sequence as is_sequence,
+    coerce_float as coerce_float,
+    is_mapping_t as is_mapping_t,
+    removeprefix as removeprefix,
+    removesuffix as removesuffix,
+    extract_files as extract_files,
+    is_sequence_t as is_sequence_t,
+    required_args as required_args,
+    coerce_boolean as coerce_boolean,
+    coerce_integer as coerce_integer,
+    file_from_path as file_from_path,
+    parse_datetime as parse_datetime,
+    strip_not_given as strip_not_given,
+    deepcopy_minimal as deepcopy_minimal,
+    get_async_library as get_async_library,
+    maybe_coerce_float as maybe_coerce_float,
+    get_required_header as get_required_header,
+    maybe_coerce_boolean as maybe_coerce_boolean,
+    maybe_coerce_integer as maybe_coerce_integer,
+)
+from ._typing import (
+    is_list_type as is_list_type,
+    is_union_type as is_union_type,
+    extract_type_arg as extract_type_arg,
+    is_iterable_type as is_iterable_type,
+    is_required_type as is_required_type,
+    is_annotated_type as is_annotated_type,
+    strip_annotated_type as strip_annotated_type,
+    extract_type_var_from_base as extract_type_var_from_base,
+)
+from ._streams import (
+    consume_sync_iterator as consume_sync_iterator,
+    consume_async_iterator as consume_async_iterator,
+)
+from ._transform import (
+    PropertyInfo as PropertyInfo,
+    transform as transform,
+    async_transform as async_transform,
+    maybe_transform as maybe_transform,
+    async_maybe_transform as async_maybe_transform,
+)
diff --git a/portkey_ai/_vendor/openai/_utils/_logs.py b/portkey_ai/_vendor/openai/_utils/_logs.py
new file mode 100644
index 00000000..e5113fd8
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_utils/_logs.py
@@ -0,0 +1,25 @@
+import os
+import logging
+
+logger: logging.Logger = logging.getLogger("openai")
+httpx_logger: logging.Logger = logging.getLogger("httpx")
+
+
+def _basic_config() -> None:
+    # e.g. [2023-10-05 14:12:26 - openai._base_client:818 - DEBUG] HTTP Request: POST http://127.0.0.1:4010/foo/bar "200 OK"
+    logging.basicConfig(
+        format="[%(asctime)s - %(name)s:%(lineno)d - %(levelname)s] %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+
+
+def setup_logging() -> None:
+    env = os.environ.get("OPENAI_LOG")
+    if env == "debug":
+        _basic_config()
+        logger.setLevel(logging.DEBUG)
+        httpx_logger.setLevel(logging.DEBUG)
+    elif env == "info":
+        _basic_config()
+        logger.setLevel(logging.INFO)
+        httpx_logger.setLevel(logging.INFO)
diff --git a/portkey_ai/_vendor/openai/_utils/_proxy.py b/portkey_ai/_vendor/openai/_utils/_proxy.py
new file mode 100644
index 00000000..c46a62a6
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_utils/_proxy.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Generic, TypeVar, Iterable, cast
+from typing_extensions import override
+
+T = TypeVar("T")
+
+
+class LazyProxy(Generic[T], ABC):
+    """Implements data methods to pretend that an instance is another instance.
+
+    This includes forwarding attribute access and other methods.
+    """
+
+    # Note: we have to special case proxies that themselves return proxies
+    # to support using a proxy as a catch-all for any random access, e.g. `proxy.foo.bar.baz`
+
+    def __getattr__(self, attr: str) -> object:
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return proxied  # pyright: ignore
+        return getattr(proxied, attr)
+
+    @override
+    def __repr__(self) -> str:
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return proxied.__class__.__name__
+        return repr(self.__get_proxied__())
+
+    @override
+    def __str__(self) -> str:
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return proxied.__class__.__name__
+        return str(proxied)
+
+    @override
+    def __dir__(self) -> Iterable[str]:
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return []
+        return proxied.__dir__()
+
+    @property  # type: ignore
+    @override
+    def __class__(self) -> type:  # pyright: ignore
+        proxied = self.__get_proxied__()
+        if issubclass(type(proxied), LazyProxy):
+            return type(proxied)
+        return proxied.__class__
+
+    def __get_proxied__(self) -> T:
+        return self.__load__()
+
+    def __as_proxied__(self) -> T:
+        """Helper method that returns the current proxy, typed as the loaded object"""
+        return cast(T, self)
+
+    @abstractmethod
+    def __load__(self) -> T:
+        ...
diff --git a/portkey_ai/_vendor/openai/_utils/_streams.py b/portkey_ai/_vendor/openai/_utils/_streams.py
new file mode 100644
index 00000000..f4a0208f
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_utils/_streams.py
@@ -0,0 +1,12 @@
+from typing import Any
+from typing_extensions import Iterator, AsyncIterator
+
+
+def consume_sync_iterator(iterator: Iterator[Any]) -> None:
+    for _ in iterator:
+        ...
+
+
+async def consume_async_iterator(iterator: AsyncIterator[Any]) -> None:
+    async for _ in iterator:
+        ...
diff --git a/portkey_ai/_vendor/openai/_utils/_sync.py b/portkey_ai/_vendor/openai/_utils/_sync.py
new file mode 100644
index 00000000..a0c2ee80
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_utils/_sync.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+import functools
+from typing import TypeVar, Callable, Awaitable
+from typing_extensions import ParamSpec
+
+import anyio
+import anyio.to_thread
+
+T_Retval = TypeVar("T_Retval")
+T_ParamSpec = ParamSpec("T_ParamSpec")
+
+
+# copied from `asyncer`, https://github.com/tiangolo/asyncer
+def asyncify(
+    function: Callable[T_ParamSpec, T_Retval],
+    *,
+    cancellable: bool = False,
+    limiter: anyio.CapacityLimiter | None = None,
+) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
+    """
+    Take a blocking function and create an async one that receives the same
+    positional and keyword arguments, and that when called, calls the original function
+    in a worker thread using `anyio.to_thread.run_sync()`. Internally,
+    `asyncer.asyncify()` uses the same `anyio.to_thread.run_sync()`, but it supports
+    keyword arguments additional to positional arguments and it adds better support for
+    autocompletion and inline errors for the arguments of the function called and the
+    return value.
+
+    If the `cancellable` option is enabled and the task waiting for its completion is
+    cancelled, the thread will still run its course but its return value (or any raised
+    exception) will be ignored.
+
+    Use it like this:
+
+    ```Python
+    def do_work(arg1, arg2, kwarg1="", kwarg2="") -> str:
+        # Do work
+        return "Some result"
+
+
+    result = await to_thread.asyncify(do_work)("spam", "ham", kwarg1="a", kwarg2="b")
+    print(result)
+    ```
+
+    ## Arguments
+
+    `function`: a blocking regular callable (e.g. a function)
+    `cancellable`: `True` to allow cancellation of the operation
+    `limiter`: capacity limiter to use to limit the total amount of threads running
+        (if omitted, the default limiter is used)
+
+    ## Return
+
+    An async function that takes the same positional and keyword arguments as the
+    original one, that when called runs the same original function in a thread worker
+    and returns the result.
+    """
+
+    async def wrapper(
+        *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
+    ) -> T_Retval:
+        partial_f = functools.partial(function, *args, **kwargs)
+        return await anyio.to_thread.run_sync(
+            partial_f, cancellable=cancellable, limiter=limiter
+        )
+
+    return wrapper
diff --git a/portkey_ai/_vendor/openai/_utils/_transform.py b/portkey_ai/_vendor/openai/_utils/_transform.py
new file mode 100644
index 00000000..b61d0477
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_utils/_transform.py
@@ -0,0 +1,416 @@
+from __future__ import annotations
+
+import io
+import base64
+import pathlib
+from typing import Any, Mapping, TypeVar, cast
+from datetime import date, datetime
+from typing_extensions import Literal, get_args, override, get_type_hints
+
+import anyio
+import pydantic
+
+from ._utils import (
+    is_list,
+    is_mapping,
+    is_iterable,
+)
+from .._files import is_base64_file_input
+from ._typing import (
+    is_list_type,
+    is_union_type,
+    extract_type_arg,
+    is_iterable_type,
+    is_required_type,
+    is_annotated_type,
+    strip_annotated_type,
+)
+from .._compat import model_dump, is_typeddict
+
+_T = TypeVar("_T")
+
+
+# TODO: support for drilling globals() and locals()
+# TODO: ensure works correctly with forward references in all cases
+
+
+PropertyFormat = Literal["iso8601", "base64", "custom"]
+
+
+class PropertyInfo:
+    """Metadata class to be used in Annotated types to provide information about a given type.
+
+    For example:
+
+    class MyParams(TypedDict):
+        account_holder_name: Annotated[str, PropertyInfo(alias='accountHolderName')]
+
+    This means that {'account_holder_name': 'Robert'} will be transformed to {'accountHolderName': 'Robert'} before being sent to the API.
+    """
+
+    alias: str | None
+    format: PropertyFormat | None
+    format_template: str | None
+    discriminator: str | None
+
+    def __init__(
+        self,
+        *,
+        alias: str | None = None,
+        format: PropertyFormat | None = None,
+        format_template: str | None = None,
+        discriminator: str | None = None,
+    ) -> None:
+        self.alias = alias
+        self.format = format
+        self.format_template = format_template
+        self.discriminator = discriminator
+
+    @override
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(alias='{self.alias}', format={self.format}, format_template='{self.format_template}', discriminator='{self.discriminator}')"
+
+
+def maybe_transform(
+    data: object,
+    expected_type: object,
+) -> Any | None:
+    """Wrapper over `transform()` that allows `None` to be passed.
+
+    See `transform()` for more details.
+    """
+    if data is None:
+        return None
+    return transform(data, expected_type)
+
+
+# Wrapper over _transform_recursive providing fake types
+def transform(
+    data: _T,
+    expected_type: object,
+) -> _T:
+    """Transform dictionaries based off of type information from the given type, for example:
+
+    ```py
+    class Params(TypedDict, total=False):
+        card_id: Required[Annotated[str, PropertyInfo(alias="cardID")]]
+
+
+    transformed = transform({"card_id": "<my card ID>"}, Params)
+    # {'cardID': '<my card ID>'}
+    ```
+
+    Any keys / data that does not have type information given will be included as is.
+
+    It should be noted that the transformations that this function does are not represented in the type system.
+    """
+    transformed = _transform_recursive(data, annotation=cast(type, expected_type))
+    return cast(_T, transformed)
+
+
+def _get_annotated_type(type_: type) -> type | None:
+    """If the given type is an `Annotated` type then it is returned, if not `None` is returned.
+
+    This also unwraps the type when applicable, e.g. `Required[Annotated[T, ...]]`
+    """
+    if is_required_type(type_):
+        # Unwrap `Required[Annotated[T, ...]]` to `Annotated[T, ...]`
+        type_ = get_args(type_)[0]
+
+    if is_annotated_type(type_):
+        return type_
+
+    return None
+
+
+def _maybe_transform_key(key: str, type_: type) -> str:
+    """Transform the given `data` based on the annotations provided in `type_`.
+
+    Note: this function only looks at `Annotated` types that contain `PropertInfo` metadata.
+    """
+    annotated_type = _get_annotated_type(type_)
+    if annotated_type is None:
+        # no `Annotated` definition for this type, no transformation needed
+        return key
+
+    # ignore the first argument as it is the actual type
+    annotations = get_args(annotated_type)[1:]
+    for annotation in annotations:
+        if isinstance(annotation, PropertyInfo) and annotation.alias is not None:
+            return annotation.alias
+
+    return key
+
+
+def _transform_recursive(
+    data: object,
+    *,
+    annotation: type,
+    inner_type: type | None = None,
+) -> object:
+    """Transform the given data against the expected type.
+
+    Args:
+        annotation: The direct type annotation given to the particular piece of data.
+            This may or may not be wrapped in metadata types, e.g. `Required[T]`, `Annotated[T, ...]` etc
+
+        inner_type: If applicable, this is the "inside" type. This is useful in certain cases where the outside type
+            is a container type such as `List[T]`. In that case `inner_type` should be set to `T` so that each entry in
+            the list can be transformed using the metadata from the container type.
+
+            Defaults to the same value as the `annotation` argument.
+    """
+    if inner_type is None:
+        inner_type = annotation
+
+    stripped_type = strip_annotated_type(inner_type)
+    if is_typeddict(stripped_type) and is_mapping(data):
+        return _transform_typeddict(data, stripped_type)
+
+    if (
+        # List[T]
+        (is_list_type(stripped_type) and is_list(data))
+        # Iterable[T]
+        or (
+            is_iterable_type(stripped_type)
+            and is_iterable(data)
+            and not isinstance(data, str)
+        )
+    ):
+        inner_type = extract_type_arg(stripped_type, 0)
+        return [
+            _transform_recursive(d, annotation=annotation, inner_type=inner_type)
+            for d in data
+        ]
+
+    if is_union_type(stripped_type):
+        # For union types we run the transformation against all subtypes to ensure that everything is transformed.
+        #
+        # TODO: there may be edge cases where the same normalized field name will transform to two different names
+        # in different subtypes.
+        for subtype in get_args(stripped_type):
+            data = _transform_recursive(data, annotation=annotation, inner_type=subtype)
+        return data
+
+    if isinstance(data, pydantic.BaseModel):
+        return model_dump(data, exclude_unset=True)
+
+    annotated_type = _get_annotated_type(annotation)
+    if annotated_type is None:
+        return data
+
+    # ignore the first argument as it is the actual type
+    annotations = get_args(annotated_type)[1:]
+    for annotation in annotations:
+        if isinstance(annotation, PropertyInfo) and annotation.format is not None:
+            return _format_data(data, annotation.format, annotation.format_template)
+
+    return data
+
+
+def _format_data(
+    data: object, format_: PropertyFormat, format_template: str | None
+) -> object:
+    if isinstance(data, (date, datetime)):
+        if format_ == "iso8601":
+            return data.isoformat()
+
+        if format_ == "custom" and format_template is not None:
+            return data.strftime(format_template)
+
+    if format_ == "base64" and is_base64_file_input(data):
+        binary: str | bytes | None = None
+
+        if isinstance(data, pathlib.Path):
+            binary = data.read_bytes()
+        elif isinstance(data, io.IOBase):
+            binary = data.read()
+
+            if isinstance(binary, str):  # type: ignore[unreachable]
+                binary = binary.encode()
+
+        if not isinstance(binary, bytes):
+            raise RuntimeError(
+                f"Could not read bytes from {data}; Received {type(binary)}"
+            )
+
+        return base64.b64encode(binary).decode("ascii")
+
+    return data
+
+
+def _transform_typeddict(
+    data: Mapping[str, object],
+    expected_type: type,
+) -> Mapping[str, object]:
+    result: dict[str, object] = {}
+    annotations = get_type_hints(expected_type, include_extras=True)
+    for key, value in data.items():
+        type_ = annotations.get(key)
+        if type_ is None:
+            # we do not have a type annotation for this field, leave it as is
+            result[key] = value
+        else:
+            result[_maybe_transform_key(key, type_)] = _transform_recursive(
+                value, annotation=type_
+            )
+    return result
+
+
+async def async_maybe_transform(
+    data: object,
+    expected_type: object,
+) -> Any | None:
+    """Wrapper over `async_transform()` that allows `None` to be passed.
+
+    See `async_transform()` for more details.
+    """
+    if data is None:
+        return None
+    return await async_transform(data, expected_type)
+
+
+async def async_transform(
+    data: _T,
+    expected_type: object,
+) -> _T:
+    """Transform dictionaries based off of type information from the given type, for example:
+
+    ```py
+    class Params(TypedDict, total=False):
+        card_id: Required[Annotated[str, PropertyInfo(alias="cardID")]]
+
+
+    transformed = transform({"card_id": "<my card ID>"}, Params)
+    # {'cardID': '<my card ID>'}
+    ```
+
+    Any keys / data that does not have type information given will be included as is.
+
+    It should be noted that the transformations that this function does are not represented in the type system.
+    """
+    transformed = await _async_transform_recursive(
+        data, annotation=cast(type, expected_type)
+    )
+    return cast(_T, transformed)
+
+
+async def _async_transform_recursive(
+    data: object,
+    *,
+    annotation: type,
+    inner_type: type | None = None,
+) -> object:
+    """Transform the given data against the expected type.
+
+    Args:
+        annotation: The direct type annotation given to the particular piece of data.
+            This may or may not be wrapped in metadata types, e.g. `Required[T]`, `Annotated[T, ...]` etc
+
+        inner_type: If applicable, this is the "inside" type. This is useful in certain cases where the outside type
+            is a container type such as `List[T]`. In that case `inner_type` should be set to `T` so that each entry in
+            the list can be transformed using the metadata from the container type.
+
+            Defaults to the same value as the `annotation` argument.
+    """
+    if inner_type is None:
+        inner_type = annotation
+
+    stripped_type = strip_annotated_type(inner_type)
+    if is_typeddict(stripped_type) and is_mapping(data):
+        return await _async_transform_typeddict(data, stripped_type)
+
+    if (
+        # List[T]
+        (is_list_type(stripped_type) and is_list(data))
+        # Iterable[T]
+        or (
+            is_iterable_type(stripped_type)
+            and is_iterable(data)
+            and not isinstance(data, str)
+        )
+    ):
+        inner_type = extract_type_arg(stripped_type, 0)
+        return [
+            await _async_transform_recursive(
+                d, annotation=annotation, inner_type=inner_type
+            )
+            for d in data
+        ]
+
+    if is_union_type(stripped_type):
+        # For union types we run the transformation against all subtypes to ensure that everything is transformed.
+        #
+        # TODO: there may be edge cases where the same normalized field name will transform to two different names
+        # in different subtypes.
+        for subtype in get_args(stripped_type):
+            data = await _async_transform_recursive(
+                data, annotation=annotation, inner_type=subtype
+            )
+        return data
+
+    if isinstance(data, pydantic.BaseModel):
+        return model_dump(data, exclude_unset=True)
+
+    annotated_type = _get_annotated_type(annotation)
+    if annotated_type is None:
+        return data
+
+    # ignore the first argument as it is the actual type
+    annotations = get_args(annotated_type)[1:]
+    for annotation in annotations:
+        if isinstance(annotation, PropertyInfo) and annotation.format is not None:
+            return await _async_format_data(
+                data, annotation.format, annotation.format_template
+            )
+
+    return data
+
+
+async def _async_format_data(
+    data: object, format_: PropertyFormat, format_template: str | None
+) -> object:
+    if isinstance(data, (date, datetime)):
+        if format_ == "iso8601":
+            return data.isoformat()
+
+        if format_ == "custom" and format_template is not None:
+            return data.strftime(format_template)
+
+    if format_ == "base64" and is_base64_file_input(data):
+        binary: str | bytes | None = None
+
+        if isinstance(data, pathlib.Path):
+            binary = await anyio.Path(data).read_bytes()
+        elif isinstance(data, io.IOBase):
+            binary = data.read()
+
+            if isinstance(binary, str):  # type: ignore[unreachable]
+                binary = binary.encode()
+
+        if not isinstance(binary, bytes):
+            raise RuntimeError(
+                f"Could not read bytes from {data}; Received {type(binary)}"
+            )
+
+        return base64.b64encode(binary).decode("ascii")
+
+    return data
+
+
+async def _async_transform_typeddict(
+    data: Mapping[str, object],
+    expected_type: type,
+) -> Mapping[str, object]:
+    result: dict[str, object] = {}
+    annotations = get_type_hints(expected_type, include_extras=True)
+    for key, value in data.items():
+        type_ = annotations.get(key)
+        if type_ is None:
+            # we do not have a type annotation for this field, leave it as is
+            result[key] = value
+        else:
+            result[_maybe_transform_key(key, type_)] = await _async_transform_recursive(
+                value, annotation=type_
+            )
+    return result
diff --git a/portkey_ai/_vendor/openai/_utils/_typing.py b/portkey_ai/_vendor/openai/_utils/_typing.py
new file mode 100644
index 00000000..312c8ded
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_utils/_typing.py
@@ -0,0 +1,125 @@
+from __future__ import annotations
+
+from typing import Any, TypeVar, Iterable, cast
+from collections import abc as _c_abc
+from typing_extensions import Required, Annotated, get_args, get_origin
+
+from .._types import InheritsGeneric
+from .._compat import is_union as _is_union
+
+
+def is_annotated_type(typ: type) -> bool:
+    return get_origin(typ) == Annotated
+
+
+def is_list_type(typ: type) -> bool:
+    return (get_origin(typ) or typ) == list
+
+
+def is_iterable_type(typ: type) -> bool:
+    """If the given type is `typing.Iterable[T]`"""
+    origin = get_origin(typ) or typ
+    return origin == Iterable or origin == _c_abc.Iterable
+
+
+def is_union_type(typ: type) -> bool:
+    return _is_union(get_origin(typ))
+
+
+def is_required_type(typ: type) -> bool:
+    return get_origin(typ) == Required
+
+
+def is_typevar(typ: type) -> bool:
+    # type ignore is required because type checkers
+    # think this expression will always return False
+    return type(typ) == TypeVar  # type: ignore
+
+
+# Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]]
+def strip_annotated_type(typ: type) -> type:
+    if is_required_type(typ) or is_annotated_type(typ):
+        return strip_annotated_type(cast(type, get_args(typ)[0]))
+
+    return typ
+
+
+def extract_type_arg(typ: type, index: int) -> type:
+    args = get_args(typ)
+    try:
+        return cast(type, args[index])
+    except IndexError as err:
+        raise RuntimeError(
+            f"Expected type {typ} to have a type argument at index {index} but it did not"
+        ) from err
+
+
+def extract_type_var_from_base(
+    typ: type,
+    *,
+    generic_bases: tuple[type, ...],
+    index: int,
+    failure_message: str | None = None,
+) -> type:
+    """Given a type like `Foo[T]`, returns the generic type variable `T`.
+
+    This also handles the case where a concrete subclass is given, e.g.
+    ```py
+    class MyResponse(Foo[bytes]):
+        ...
+
+    extract_type_var(MyResponse, bases=(Foo,), index=0) -> bytes
+    ```
+
+    And where a generic subclass is given:
+    ```py
+    _T = TypeVar('_T')
+    class MyResponse(Foo[_T]):
+        ...
+
+    extract_type_var(MyResponse[bytes], bases=(Foo,), index=0) -> bytes
+    ```
+    """
+    cls = cast(object, get_origin(typ) or typ)
+    if cls in generic_bases:
+        # we're given the class directly
+        return extract_type_arg(typ, index)
+
+    # if a subclass is given
+    # ---
+    # this is needed as __orig_bases__ is not present in the typeshed stubs
+    # because it is intended to be for internal use only, however there does
+    # not seem to be a way to resolve generic TypeVars for inherited subclasses
+    # without using it.
+    if isinstance(cls, InheritsGeneric):
+        target_base_class: Any | None = None
+        for base in cls.__orig_bases__:
+            if base.__origin__ in generic_bases:
+                target_base_class = base
+                break
+
+        if target_base_class is None:
+            raise RuntimeError(
+                "Could not find the generic base class;\n"
+                "This should never happen;\n"
+                f"Does {cls} inherit from one of {generic_bases} ?"
+            )
+
+        extracted = extract_type_arg(target_base_class, index)
+        if is_typevar(extracted):
+            # If the extracted type argument is itself a type variable
+            # then that means the subclass itself is generic, so we have
+            # to resolve the type argument from the class itself, not
+            # the base class.
+            #
+            # Note: if there is more than 1 type argument, the subclass could
+            # change the ordering of the type arguments, this is not currently
+            # supported.
+            return extract_type_arg(typ, index)
+
+        return extracted
+
+    raise RuntimeError(
+        failure_message
+        or f"Could not resolve inner type variable at index {index} for {typ}"
+    )
diff --git a/portkey_ai/_vendor/openai/_utils/_utils.py b/portkey_ai/_vendor/openai/_utils/_utils.py
new file mode 100644
index 00000000..32cdbc4d
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_utils/_utils.py
@@ -0,0 +1,414 @@
+from __future__ import annotations
+
+import os
+import re
+import inspect
+import functools
+from typing import (
+    Any,
+    Tuple,
+    Mapping,
+    TypeVar,
+    Callable,
+    Iterable,
+    Sequence,
+    cast,
+    overload,
+)
+from pathlib import Path
+from typing_extensions import TypeGuard
+
+import sniffio
+
+from .._types import Headers, NotGiven, FileTypes, NotGivenOr, HeadersLike
+from .._compat import parse_date as parse_date, parse_datetime as parse_datetime
+
+_T = TypeVar("_T")
+_TupleT = TypeVar("_TupleT", bound=Tuple[object, ...])
+_MappingT = TypeVar("_MappingT", bound=Mapping[str, object])
+_SequenceT = TypeVar("_SequenceT", bound=Sequence[object])
+CallableT = TypeVar("CallableT", bound=Callable[..., Any])
+
+
+def flatten(t: Iterable[Iterable[_T]]) -> list[_T]:
+    return [item for sublist in t for item in sublist]
+
+
+def extract_files(
+    # TODO: this needs to take Dict but variance issues.....
+    # create protocol type ?
+    query: Mapping[str, object],
+    *,
+    paths: Sequence[Sequence[str]],
+) -> list[tuple[str, FileTypes]]:
+    """Recursively extract files from the given dictionary based on specified paths.
+
+    A path may look like this ['foo', 'files', '<array>', 'data'].
+
+    Note: this mutates the given dictionary.
+    """
+    files: list[tuple[str, FileTypes]] = []
+    for path in paths:
+        files.extend(_extract_items(query, path, index=0, flattened_key=None))
+    return files
+
+
+def _extract_items(
+    obj: object,
+    path: Sequence[str],
+    *,
+    index: int,
+    flattened_key: str | None,
+) -> list[tuple[str, FileTypes]]:
+    try:
+        key = path[index]
+    except IndexError:
+        if isinstance(obj, NotGiven):
+            # no value was provided - we can safely ignore
+            return []
+
+        # cyclical import
+        from .._files import assert_is_file_content
+
+        # We have exhausted the path, return the entry we found.
+        assert_is_file_content(obj, key=flattened_key)
+        assert flattened_key is not None
+        return [(flattened_key, cast(FileTypes, obj))]
+
+    index += 1
+    if is_dict(obj):
+        try:
+            # We are at the last entry in the path so we must remove the field
+            if (len(path)) == index:
+                item = obj.pop(key)
+            else:
+                item = obj[key]
+        except KeyError:
+            # Key was not present in the dictionary, this is not indicative of an error
+            # as the given path may not point to a required field. We also do not want
+            # to enforce required fields as the API may differ from the spec in some cases.
+            return []
+        if flattened_key is None:
+            flattened_key = key
+        else:
+            flattened_key += f"[{key}]"
+        return _extract_items(
+            item,
+            path,
+            index=index,
+            flattened_key=flattened_key,
+        )
+    elif is_list(obj):
+        if key != "<array>":
+            return []
+
+        return flatten(
+            [
+                _extract_items(
+                    item,
+                    path,
+                    index=index,
+                    flattened_key=flattened_key + "[]"
+                    if flattened_key is not None
+                    else "[]",
+                )
+                for item in obj
+            ]
+        )
+
+    # Something unexpected was passed, just ignore it.
+    return []
+
+
+def is_given(obj: NotGivenOr[_T]) -> TypeGuard[_T]:
+    return not isinstance(obj, NotGiven)
+
+
+# Type safe methods for narrowing types with TypeVars.
+# The default narrowing for isinstance(obj, dict) is dict[unknown, unknown],
+# however this cause Pyright to rightfully report errors. As we know we don't
+# care about the contained types we can safely use `object` in it's place.
+#
+# There are two separate functions defined, `is_*` and `is_*_t` for different use cases.
+# `is_*` is for when you're dealing with an unknown input
+# `is_*_t` is for when you're narrowing a known union type to a specific subset
+
+
+def is_tuple(obj: object) -> TypeGuard[tuple[object, ...]]:
+    return isinstance(obj, tuple)
+
+
+def is_tuple_t(obj: _TupleT | object) -> TypeGuard[_TupleT]:
+    return isinstance(obj, tuple)
+
+
+def is_sequence(obj: object) -> TypeGuard[Sequence[object]]:
+    return isinstance(obj, Sequence)
+
+
+def is_sequence_t(obj: _SequenceT | object) -> TypeGuard[_SequenceT]:
+    return isinstance(obj, Sequence)
+
+
+def is_mapping(obj: object) -> TypeGuard[Mapping[str, object]]:
+    return isinstance(obj, Mapping)
+
+
+def is_mapping_t(obj: _MappingT | object) -> TypeGuard[_MappingT]:
+    return isinstance(obj, Mapping)
+
+
+def is_dict(obj: object) -> TypeGuard[dict[object, object]]:
+    return isinstance(obj, dict)
+
+
+def is_list(obj: object) -> TypeGuard[list[object]]:
+    return isinstance(obj, list)
+
+
+def is_iterable(obj: object) -> TypeGuard[Iterable[object]]:
+    return isinstance(obj, Iterable)
+
+
+def deepcopy_minimal(item: _T) -> _T:
+    """Minimal reimplementation of copy.deepcopy() that will only copy certain object types:
+
+    - mappings, e.g. `dict`
+    - list
+
+    This is done for performance reasons.
+    """
+    if is_mapping(item):
+        return cast(_T, {k: deepcopy_minimal(v) for k, v in item.items()})
+    if is_list(item):
+        return cast(_T, [deepcopy_minimal(entry) for entry in item])
+    return item
+
+
+# copied from https://github.com/Rapptz/RoboDanny
+def human_join(seq: Sequence[str], *, delim: str = ", ", final: str = "or") -> str:
+    size = len(seq)
+    if size == 0:
+        return ""
+
+    if size == 1:
+        return seq[0]
+
+    if size == 2:
+        return f"{seq[0]} {final} {seq[1]}"
+
+    return delim.join(seq[:-1]) + f" {final} {seq[-1]}"
+
+
+def quote(string: str) -> str:
+    """Add single quotation marks around the given string. Does *not* do any escaping."""
+    return f"'{string}'"
+
+
+def required_args(*variants: Sequence[str]) -> Callable[[CallableT], CallableT]:
+    """Decorator to enforce a given set of arguments or variants of arguments are passed to the decorated function.
+
+    Useful for enforcing runtime validation of overloaded functions.
+
+    Example usage:
+    ```py
+    @overload
+    def foo(*, a: str) -> str:
+        ...
+
+
+    @overload
+    def foo(*, b: bool) -> str:
+        ...
+
+
+    # This enforces the same constraints that a static type checker would
+    # i.e. that either a or b must be passed to the function
+    @required_args(["a"], ["b"])
+    def foo(*, a: str | None = None, b: bool | None = None) -> str:
+        ...
+    ```
+    """
+
+    def inner(func: CallableT) -> CallableT:
+        params = inspect.signature(func).parameters
+        positional = [
+            name
+            for name, param in params.items()
+            if param.kind
+            in {
+                param.POSITIONAL_ONLY,
+                param.POSITIONAL_OR_KEYWORD,
+            }
+        ]
+
+        @functools.wraps(func)
+        def wrapper(*args: object, **kwargs: object) -> object:
+            given_params: set[str] = set()
+            for i, _ in enumerate(args):
+                try:
+                    given_params.add(positional[i])
+                except IndexError:
+                    raise TypeError(
+                        f"{func.__name__}() takes {len(positional)} argument(s) but {len(args)} were given"
+                    ) from None
+
+            for key in kwargs.keys():
+                given_params.add(key)
+
+            for variant in variants:
+                matches = all((param in given_params for param in variant))
+                if matches:
+                    break
+            else:  # no break
+                if len(variants) > 1:
+                    variations = human_join(
+                        [
+                            "("
+                            + human_join([quote(arg) for arg in variant], final="and")
+                            + ")"
+                            for variant in variants
+                        ]
+                    )
+                    msg = f"Missing required arguments; Expected either {variations} arguments to be given"
+                else:
+                    assert len(variants) > 0
+
+                    # TODO: this error message is not deterministic
+                    missing = list(set(variants[0]) - given_params)
+                    if len(missing) > 1:
+                        msg = f"Missing required arguments: {human_join([quote(arg) for arg in missing])}"
+                    else:
+                        msg = f"Missing required argument: {quote(missing[0])}"
+                raise TypeError(msg)
+            return func(*args, **kwargs)
+
+        return wrapper  # type: ignore
+
+    return inner
+
+
+_K = TypeVar("_K")
+_V = TypeVar("_V")
+
+
+@overload
+def strip_not_given(obj: None) -> None:
+    ...
+
+
+@overload
+def strip_not_given(obj: Mapping[_K, _V | NotGiven]) -> dict[_K, _V]:
+    ...
+
+
+@overload
+def strip_not_given(obj: object) -> object:
+    ...
+
+
+def strip_not_given(obj: object | None) -> object:
+    """Remove all top-level keys where their values are instances of `NotGiven`"""
+    if obj is None:
+        return None
+
+    if not is_mapping(obj):
+        return obj
+
+    return {key: value for key, value in obj.items() if not isinstance(value, NotGiven)}
+
+
+def coerce_integer(val: str) -> int:
+    return int(val, base=10)
+
+
+def coerce_float(val: str) -> float:
+    return float(val)
+
+
+def coerce_boolean(val: str) -> bool:
+    return val == "true" or val == "1" or val == "on"
+
+
+def maybe_coerce_integer(val: str | None) -> int | None:
+    if val is None:
+        return None
+    return coerce_integer(val)
+
+
+def maybe_coerce_float(val: str | None) -> float | None:
+    if val is None:
+        return None
+    return coerce_float(val)
+
+
+def maybe_coerce_boolean(val: str | None) -> bool | None:
+    if val is None:
+        return None
+    return coerce_boolean(val)
+
+
+def removeprefix(string: str, prefix: str) -> str:
+    """Remove a prefix from a string.
+
+    Backport of `str.removeprefix` for Python < 3.9
+    """
+    if string.startswith(prefix):
+        return string[len(prefix) :]
+    return string
+
+
+def removesuffix(string: str, suffix: str) -> str:
+    """Remove a suffix from a string.
+
+    Backport of `str.removesuffix` for Python < 3.9
+    """
+    if string.endswith(suffix):
+        return string[: -len(suffix)]
+    return string
+
+
+def file_from_path(path: str) -> FileTypes:
+    contents = Path(path).read_bytes()
+    file_name = os.path.basename(path)
+    return (file_name, contents)
+
+
+def get_required_header(headers: HeadersLike, header: str) -> str:
+    lower_header = header.lower()
+    if isinstance(headers, Mapping):
+        headers = cast(Headers, headers)
+        for k, v in headers.items():
+            if k.lower() == lower_header and isinstance(v, str):
+                return v
+
+    """ to deal with the case where the header looks like Stainless-Event-Id """
+    intercaps_header = re.sub(
+        r"([^\w])(\w)",
+        lambda pat: pat.group(1) + pat.group(2).upper(),
+        header.capitalize(),
+    )
+
+    for normalized_header in [header, lower_header, header.upper(), intercaps_header]:
+        value = headers.get(normalized_header)
+        if value:
+            return value
+
+    raise ValueError(f"Could not find {header} header")
+
+
+def get_async_library() -> str:
+    try:
+        return sniffio.current_async_library()
+    except Exception:
+        return "false"
+
+
+def lru_cache(*, maxsize: int | None = 128) -> Callable[[CallableT], CallableT]:
+    """A version of functools.lru_cache that retains the type signature
+    for the wrapped function arguments.
+    """
+    wrapper = functools.lru_cache(  # noqa: TID251
+        maxsize=maxsize,
+    )
+    return cast(Any, wrapper)  # type: ignore[no-any-return]
diff --git a/portkey_ai/_vendor/openai/_version.py b/portkey_ai/_vendor/openai/_version.py
new file mode 100644
index 00000000..49495e5c
--- /dev/null
+++ b/portkey_ai/_vendor/openai/_version.py
@@ -0,0 +1,4 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+__title__ = "openai"
+__version__ = "1.26.0"  # x-release-please-version
diff --git a/portkey_ai/_vendor/openai/cli/__init__.py b/portkey_ai/_vendor/openai/cli/__init__.py
new file mode 100644
index 00000000..d453d5e1
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/__init__.py
@@ -0,0 +1 @@
+from ._cli import main as main
diff --git a/portkey_ai/_vendor/openai/cli/_api/__init__.py b/portkey_ai/_vendor/openai/cli/_api/__init__.py
new file mode 100644
index 00000000..56a0260a
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_api/__init__.py
@@ -0,0 +1 @@
+from ._main import register_commands as register_commands
diff --git a/portkey_ai/_vendor/openai/cli/_api/_main.py b/portkey_ai/_vendor/openai/cli/_api/_main.py
new file mode 100644
index 00000000..fe5a5e6f
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_api/_main.py
@@ -0,0 +1,16 @@
+from __future__ import annotations
+
+from argparse import ArgumentParser
+
+from . import chat, audio, files, image, models, completions
+
+
+def register_commands(parser: ArgumentParser) -> None:
+    subparsers = parser.add_subparsers(help="All API subcommands")
+
+    chat.register(subparsers)
+    image.register(subparsers)
+    audio.register(subparsers)
+    files.register(subparsers)
+    models.register(subparsers)
+    completions.register(subparsers)
diff --git a/portkey_ai/_vendor/openai/cli/_api/audio.py b/portkey_ai/_vendor/openai/cli/_api/audio.py
new file mode 100644
index 00000000..90d21b99
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_api/audio.py
@@ -0,0 +1,94 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Optional, cast
+from argparse import ArgumentParser
+
+from .._utils import get_client, print_model
+from ..._types import NOT_GIVEN
+from .._models import BaseModel
+from .._progress import BufferReader
+
+if TYPE_CHECKING:
+    from argparse import _SubParsersAction
+
+
+def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
+    # transcriptions
+    sub = subparser.add_parser("audio.transcriptions.create")
+
+    # Required
+    sub.add_argument("-m", "--model", type=str, default="whisper-1")
+    sub.add_argument("-f", "--file", type=str, required=True)
+    # Optional
+    sub.add_argument("--response-format", type=str)
+    sub.add_argument("--language", type=str)
+    sub.add_argument("-t", "--temperature", type=float)
+    sub.add_argument("--prompt", type=str)
+    sub.set_defaults(func=CLIAudio.transcribe, args_model=CLITranscribeArgs)
+
+    # translations
+    sub = subparser.add_parser("audio.translations.create")
+
+    # Required
+    sub.add_argument("-f", "--file", type=str, required=True)
+    # Optional
+    sub.add_argument("-m", "--model", type=str, default="whisper-1")
+    sub.add_argument("--response-format", type=str)
+    # TODO: doesn't seem to be supported by the API
+    # sub.add_argument("--language", type=str)
+    sub.add_argument("-t", "--temperature", type=float)
+    sub.add_argument("--prompt", type=str)
+    sub.set_defaults(func=CLIAudio.translate, args_model=CLITranslationArgs)
+
+
+class CLITranscribeArgs(BaseModel):
+    model: str
+    file: str
+    response_format: Optional[str] = None
+    language: Optional[str] = None
+    temperature: Optional[float] = None
+    prompt: Optional[str] = None
+
+
+class CLITranslationArgs(BaseModel):
+    model: str
+    file: str
+    response_format: Optional[str] = None
+    language: Optional[str] = None
+    temperature: Optional[float] = None
+    prompt: Optional[str] = None
+
+
+class CLIAudio:
+    @staticmethod
+    def transcribe(args: CLITranscribeArgs) -> None:
+        with open(args.file, "rb") as file_reader:
+            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
+
+        model = get_client().audio.transcriptions.create(
+            file=(args.file, buffer_reader),
+            model=args.model,
+            language=args.language or NOT_GIVEN,
+            temperature=args.temperature or NOT_GIVEN,
+            prompt=args.prompt or NOT_GIVEN,
+            # casts required because the API is typed for enums
+            # but we don't want to validate that here for forwards-compat
+            response_format=cast(Any, args.response_format),
+        )
+        print_model(model)
+
+    @staticmethod
+    def translate(args: CLITranslationArgs) -> None:
+        with open(args.file, "rb") as file_reader:
+            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
+
+        model = get_client().audio.translations.create(
+            file=(args.file, buffer_reader),
+            model=args.model,
+            temperature=args.temperature or NOT_GIVEN,
+            prompt=args.prompt or NOT_GIVEN,
+            # casts required because the API is typed for enums
+            # but we don't want to validate that here for forwards-compat
+            response_format=cast(Any, args.response_format),
+        )
+        print_model(model)
diff --git a/portkey_ai/_vendor/openai/cli/_api/chat/__init__.py b/portkey_ai/_vendor/openai/cli/_api/chat/__init__.py
new file mode 100644
index 00000000..87d97163
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_api/chat/__init__.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from argparse import ArgumentParser
+
+from . import completions
+
+if TYPE_CHECKING:
+    from argparse import _SubParsersAction
+
+
+def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
+    completions.register(subparser)
diff --git a/portkey_ai/_vendor/openai/cli/_api/chat/completions.py b/portkey_ai/_vendor/openai/cli/_api/chat/completions.py
new file mode 100644
index 00000000..45d2263a
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_api/chat/completions.py
@@ -0,0 +1,176 @@
+from __future__ import annotations
+
+import sys
+from typing import TYPE_CHECKING, List, Optional, cast
+from argparse import ArgumentParser
+from typing_extensions import Literal, NamedTuple
+
+from ..._utils import get_client
+from ..._models import BaseModel
+from ...._streaming import Stream
+from ....types.chat import (
+    ChatCompletionRole,
+    ChatCompletionChunk,
+    CompletionCreateParams,
+)
+from ....types.chat.completion_create_params import (
+    CompletionCreateParamsStreaming,
+    CompletionCreateParamsNonStreaming,
+)
+
+if TYPE_CHECKING:
+    from argparse import _SubParsersAction
+
+
+def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
+    sub = subparser.add_parser("chat.completions.create")
+
+    sub._action_groups.pop()
+    req = sub.add_argument_group("required arguments")
+    opt = sub.add_argument_group("optional arguments")
+
+    req.add_argument(
+        "-g",
+        "--message",
+        action="append",
+        nargs=2,
+        metavar=("ROLE", "CONTENT"),
+        help="A message in `{role} {content}` format. Use this argument multiple times to add multiple messages.",
+        required=True,
+    )
+    req.add_argument(
+        "-m",
+        "--model",
+        help="The model to use.",
+        required=True,
+    )
+
+    opt.add_argument(
+        "-n",
+        "--n",
+        help="How many completions to generate for the conversation.",
+        type=int,
+    )
+    opt.add_argument(
+        "-M", "--max-tokens", help="The maximum number of tokens to generate.", type=int
+    )
+    opt.add_argument(
+        "-t",
+        "--temperature",
+        help="""What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.
+
+Mutually exclusive with `top_p`.""",
+        type=float,
+    )
+    opt.add_argument(
+        "-P",
+        "--top_p",
+        help="""An alternative to sampling with temperature, called nucleus sampling, where the considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10%% probability mass are considered.
+
+            Mutually exclusive with `temperature`.""",
+        type=float,
+    )
+    opt.add_argument(
+        "--stop",
+        help="A stop sequence at which to stop generating tokens for the message.",
+    )
+    opt.add_argument(
+        "--stream", help="Stream messages as they're ready.", action="store_true"
+    )
+    sub.set_defaults(
+        func=CLIChatCompletion.create, args_model=CLIChatCompletionCreateArgs
+    )
+
+
+class CLIMessage(NamedTuple):
+    role: ChatCompletionRole
+    content: str
+
+
+class CLIChatCompletionCreateArgs(BaseModel):
+    message: List[CLIMessage]
+    model: str
+    n: Optional[int] = None
+    max_tokens: Optional[int] = None
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    stop: Optional[str] = None
+    stream: bool = False
+
+
+class CLIChatCompletion:
+    @staticmethod
+    def create(args: CLIChatCompletionCreateArgs) -> None:
+        params: CompletionCreateParams = {
+            "model": args.model,
+            "messages": [
+                {
+                    "role": cast(Literal["user"], message.role),
+                    "content": message.content,
+                }
+                for message in args.message
+            ],
+            "n": args.n,
+            "temperature": args.temperature,
+            "top_p": args.top_p,
+            "stop": args.stop,
+            # type checkers are not good at inferring union types so we have to set stream afterwards
+            "stream": False,
+        }
+        if args.stream:
+            params["stream"] = args.stream  # type: ignore
+        if args.max_tokens is not None:
+            params["max_tokens"] = args.max_tokens
+
+        if args.stream:
+            return CLIChatCompletion._stream_create(
+                cast(CompletionCreateParamsStreaming, params)
+            )
+
+        return CLIChatCompletion._create(
+            cast(CompletionCreateParamsNonStreaming, params)
+        )
+
+    @staticmethod
+    def _create(params: CompletionCreateParamsNonStreaming) -> None:
+        completion = get_client().chat.completions.create(**params)
+        should_print_header = len(completion.choices) > 1
+        for choice in completion.choices:
+            if should_print_header:
+                sys.stdout.write(
+                    "===== Chat Completion {} =====\n".format(choice.index)
+                )
+
+            content = (
+                choice.message.content if choice.message.content is not None else "None"
+            )
+            sys.stdout.write(content)
+
+            if should_print_header or not content.endswith("\n"):
+                sys.stdout.write("\n")
+
+            sys.stdout.flush()
+
+    @staticmethod
+    def _stream_create(params: CompletionCreateParamsStreaming) -> None:
+        # cast is required for mypy
+        stream = cast(  # pyright: ignore[reportUnnecessaryCast]
+            Stream[ChatCompletionChunk], get_client().chat.completions.create(**params)
+        )
+        for chunk in stream:
+            should_print_header = len(chunk.choices) > 1
+            for choice in chunk.choices:
+                if should_print_header:
+                    sys.stdout.write(
+                        "===== Chat Completion {} =====\n".format(choice.index)
+                    )
+
+                content = choice.delta.content or ""
+                sys.stdout.write(content)
+
+                if should_print_header:
+                    sys.stdout.write("\n")
+
+                sys.stdout.flush()
+
+        sys.stdout.write("\n")
diff --git a/portkey_ai/_vendor/openai/cli/_api/completions.py b/portkey_ai/_vendor/openai/cli/_api/completions.py
new file mode 100644
index 00000000..987e53ad
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_api/completions.py
@@ -0,0 +1,185 @@
+from __future__ import annotations
+
+import sys
+from typing import TYPE_CHECKING, Optional, cast
+from argparse import ArgumentParser
+from functools import partial
+
+from ....openai.types.completion import Completion
+
+from .._utils import get_client
+from ..._types import NOT_GIVEN, NotGivenOr
+from ..._utils import is_given
+from .._errors import CLIError
+from .._models import BaseModel
+from ..._streaming import Stream
+
+if TYPE_CHECKING:
+    from argparse import _SubParsersAction
+
+
+def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
+    sub = subparser.add_parser("completions.create")
+
+    # Required
+    sub.add_argument(
+        "-m",
+        "--model",
+        help="The model to use",
+        required=True,
+    )
+
+    # Optional
+    sub.add_argument("-p", "--prompt", help="An optional prompt to complete from")
+    sub.add_argument(
+        "--stream", help="Stream tokens as they're ready.", action="store_true"
+    )
+    sub.add_argument(
+        "-M", "--max-tokens", help="The maximum number of tokens to generate", type=int
+    )
+    sub.add_argument(
+        "-t",
+        "--temperature",
+        help="""What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.
+
+Mutually exclusive with `top_p`.""",
+        type=float,
+    )
+    sub.add_argument(
+        "-P",
+        "--top_p",
+        help="""An alternative to sampling with temperature, called nucleus sampling, where the considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10%% probability mass are considered.
+
+            Mutually exclusive with `temperature`.""",
+        type=float,
+    )
+    sub.add_argument(
+        "-n",
+        "--n",
+        help="How many sub-completions to generate for each prompt.",
+        type=int,
+    )
+    sub.add_argument(
+        "--logprobs",
+        help="Include the log probabilities on the `logprobs` most likely tokens, as well the chosen tokens. So for example, if `logprobs` is 10, the API will return a list of the 10 most likely tokens. If `logprobs` is 0, only the chosen tokens will have logprobs returned.",
+        type=int,
+    )
+    sub.add_argument(
+        "--best_of",
+        help="Generates `best_of` completions server-side and returns the 'best' (the one with the highest log probability per token). Results cannot be streamed.",
+        type=int,
+    )
+    sub.add_argument(
+        "--echo",
+        help="Echo back the prompt in addition to the completion",
+        action="store_true",
+    )
+    sub.add_argument(
+        "--frequency_penalty",
+        help="Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
+        type=float,
+    )
+    sub.add_argument(
+        "--presence_penalty",
+        help="Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
+        type=float,
+    )
+    sub.add_argument(
+        "--suffix", help="The suffix that comes after a completion of inserted text."
+    )
+    sub.add_argument(
+        "--stop", help="A stop sequence at which to stop generating tokens."
+    )
+    sub.add_argument(
+        "--user",
+        help="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.",
+    )
+    # TODO: add support for logit_bias
+    sub.set_defaults(func=CLICompletions.create, args_model=CLICompletionCreateArgs)
+
+
+class CLICompletionCreateArgs(BaseModel):
+    model: str
+    stream: bool = False
+
+    prompt: Optional[str] = None
+    n: NotGivenOr[int] = NOT_GIVEN
+    stop: NotGivenOr[str] = NOT_GIVEN
+    user: NotGivenOr[str] = NOT_GIVEN
+    echo: NotGivenOr[bool] = NOT_GIVEN
+    suffix: NotGivenOr[str] = NOT_GIVEN
+    best_of: NotGivenOr[int] = NOT_GIVEN
+    top_p: NotGivenOr[float] = NOT_GIVEN
+    logprobs: NotGivenOr[int] = NOT_GIVEN
+    max_tokens: NotGivenOr[int] = NOT_GIVEN
+    temperature: NotGivenOr[float] = NOT_GIVEN
+    presence_penalty: NotGivenOr[float] = NOT_GIVEN
+    frequency_penalty: NotGivenOr[float] = NOT_GIVEN
+
+
+class CLICompletions:
+    @staticmethod
+    def create(args: CLICompletionCreateArgs) -> None:
+        if is_given(args.n) and args.n > 1 and args.stream:
+            raise CLIError("Can't stream completions with n>1 with the current CLI")
+
+        make_request = partial(
+            get_client().completions.create,
+            n=args.n,
+            echo=args.echo,
+            stop=args.stop,
+            user=args.user,
+            model=args.model,
+            top_p=args.top_p,
+            prompt=args.prompt,
+            suffix=args.suffix,
+            best_of=args.best_of,
+            logprobs=args.logprobs,
+            max_tokens=args.max_tokens,
+            temperature=args.temperature,
+            presence_penalty=args.presence_penalty,
+            frequency_penalty=args.frequency_penalty,
+        )
+
+        if args.stream:
+            return CLICompletions._stream_create(
+                # mypy doesn't understand the `partial` function but pyright does
+                cast(
+                    Stream[Completion], make_request(stream=True)
+                )  # pyright: ignore[reportUnnecessaryCast]
+            )
+
+        return CLICompletions._create(make_request())
+
+    @staticmethod
+    def _create(completion: Completion) -> None:
+        should_print_header = len(completion.choices) > 1
+        for choice in completion.choices:
+            if should_print_header:
+                sys.stdout.write("===== Completion {} =====\n".format(choice.index))
+
+            sys.stdout.write(choice.text)
+
+            if should_print_header or not choice.text.endswith("\n"):
+                sys.stdout.write("\n")
+
+            sys.stdout.flush()
+
+    @staticmethod
+    def _stream_create(stream: Stream[Completion]) -> None:
+        for completion in stream:
+            should_print_header = len(completion.choices) > 1
+            for choice in sorted(completion.choices, key=lambda c: c.index):
+                if should_print_header:
+                    sys.stdout.write(
+                        "===== Chat Completion {} =====\n".format(choice.index)
+                    )
+
+                sys.stdout.write(choice.text)
+
+                if should_print_header:
+                    sys.stdout.write("\n")
+
+                sys.stdout.flush()
+
+        sys.stdout.write("\n")
diff --git a/portkey_ai/_vendor/openai/cli/_api/files.py b/portkey_ai/_vendor/openai/cli/_api/files.py
new file mode 100644
index 00000000..5f3631b2
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_api/files.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, cast
+from argparse import ArgumentParser
+
+from .._utils import get_client, print_model
+from .._models import BaseModel
+from .._progress import BufferReader
+
+if TYPE_CHECKING:
+    from argparse import _SubParsersAction
+
+
+def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
+    sub = subparser.add_parser("files.create")
+
+    sub.add_argument(
+        "-f",
+        "--file",
+        required=True,
+        help="File to upload",
+    )
+    sub.add_argument(
+        "-p",
+        "--purpose",
+        help="Why are you uploading this file? (see https://platform.openai.com/docs/api-reference/ for purposes)",
+        required=True,
+    )
+    sub.set_defaults(func=CLIFile.create, args_model=CLIFileCreateArgs)
+
+    sub = subparser.add_parser("files.retrieve")
+    sub.add_argument("-i", "--id", required=True, help="The files ID")
+    sub.set_defaults(func=CLIFile.get, args_model=CLIFileCreateArgs)
+
+    sub = subparser.add_parser("files.delete")
+    sub.add_argument("-i", "--id", required=True, help="The files ID")
+    sub.set_defaults(func=CLIFile.delete, args_model=CLIFileCreateArgs)
+
+    sub = subparser.add_parser("files.list")
+    sub.set_defaults(func=CLIFile.list)
+
+
+class CLIFileIDArgs(BaseModel):
+    id: str
+
+
+class CLIFileCreateArgs(BaseModel):
+    file: str
+    purpose: str
+
+
+class CLIFile:
+    @staticmethod
+    def create(args: CLIFileCreateArgs) -> None:
+        with open(args.file, "rb") as file_reader:
+            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
+
+        file = get_client().files.create(
+            file=(args.file, buffer_reader),
+            # casts required because the API is typed for enums
+            # but we don't want to validate that here for forwards-compat
+            purpose=cast(Any, args.purpose),
+        )
+        print_model(file)
+
+    @staticmethod
+    def get(args: CLIFileIDArgs) -> None:
+        file = get_client().files.retrieve(file_id=args.id)
+        print_model(file)
+
+    @staticmethod
+    def delete(args: CLIFileIDArgs) -> None:
+        file = get_client().files.delete(file_id=args.id)
+        print_model(file)
+
+    @staticmethod
+    def list() -> None:
+        files = get_client().files.list()
+        for file in files:
+            print_model(file)
diff --git a/portkey_ai/_vendor/openai/cli/_api/image.py b/portkey_ai/_vendor/openai/cli/_api/image.py
new file mode 100644
index 00000000..b7938a41
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_api/image.py
@@ -0,0 +1,149 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, cast
+from argparse import ArgumentParser
+
+from .._utils import get_client, print_model
+from ..._types import NOT_GIVEN, NotGiven, NotGivenOr
+from .._models import BaseModel
+from .._progress import BufferReader
+
+if TYPE_CHECKING:
+    from argparse import _SubParsersAction
+
+
+def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
+    sub = subparser.add_parser("images.generate")
+    sub.add_argument("-m", "--model", type=str)
+    sub.add_argument("-p", "--prompt", type=str, required=True)
+    sub.add_argument("-n", "--num-images", type=int, default=1)
+    sub.add_argument(
+        "-s", "--size", type=str, default="1024x1024", help="Size of the output image"
+    )
+    sub.add_argument("--response-format", type=str, default="url")
+    sub.set_defaults(func=CLIImage.create, args_model=CLIImageCreateArgs)
+
+    sub = subparser.add_parser("images.edit")
+    sub.add_argument("-m", "--model", type=str)
+    sub.add_argument("-p", "--prompt", type=str, required=True)
+    sub.add_argument("-n", "--num-images", type=int, default=1)
+    sub.add_argument(
+        "-I",
+        "--image",
+        type=str,
+        required=True,
+        help="Image to modify. Should be a local path and a PNG encoded image.",
+    )
+    sub.add_argument(
+        "-s", "--size", type=str, default="1024x1024", help="Size of the output image"
+    )
+    sub.add_argument("--response-format", type=str, default="url")
+    sub.add_argument(
+        "-M",
+        "--mask",
+        type=str,
+        required=False,
+        help="Path to a mask image. It should be the same size as the image you're editing and a RGBA PNG image. The Alpha channel acts as the mask.",
+    )
+    sub.set_defaults(func=CLIImage.edit, args_model=CLIImageEditArgs)
+
+    sub = subparser.add_parser("images.create_variation")
+    sub.add_argument("-m", "--model", type=str)
+    sub.add_argument("-n", "--num-images", type=int, default=1)
+    sub.add_argument(
+        "-I",
+        "--image",
+        type=str,
+        required=True,
+        help="Image to modify. Should be a local path and a PNG encoded image.",
+    )
+    sub.add_argument(
+        "-s", "--size", type=str, default="1024x1024", help="Size of the output image"
+    )
+    sub.add_argument("--response-format", type=str, default="url")
+    sub.set_defaults(
+        func=CLIImage.create_variation, args_model=CLIImageCreateVariationArgs
+    )
+
+
+class CLIImageCreateArgs(BaseModel):
+    prompt: str
+    num_images: int
+    size: str
+    response_format: str
+    model: NotGivenOr[str] = NOT_GIVEN
+
+
+class CLIImageCreateVariationArgs(BaseModel):
+    image: str
+    num_images: int
+    size: str
+    response_format: str
+    model: NotGivenOr[str] = NOT_GIVEN
+
+
+class CLIImageEditArgs(BaseModel):
+    image: str
+    num_images: int
+    size: str
+    response_format: str
+    prompt: str
+    mask: NotGivenOr[str] = NOT_GIVEN
+    model: NotGivenOr[str] = NOT_GIVEN
+
+
+class CLIImage:
+    @staticmethod
+    def create(args: CLIImageCreateArgs) -> None:
+        image = get_client().images.generate(
+            model=args.model,
+            prompt=args.prompt,
+            n=args.num_images,
+            # casts required because the API is typed for enums
+            # but we don't want to validate that here for forwards-compat
+            size=cast(Any, args.size),
+            response_format=cast(Any, args.response_format),
+        )
+        print_model(image)
+
+    @staticmethod
+    def create_variation(args: CLIImageCreateVariationArgs) -> None:
+        with open(args.image, "rb") as file_reader:
+            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
+
+        image = get_client().images.create_variation(
+            model=args.model,
+            image=("image", buffer_reader),
+            n=args.num_images,
+            # casts required because the API is typed for enums
+            # but we don't want to validate that here for forwards-compat
+            size=cast(Any, args.size),
+            response_format=cast(Any, args.response_format),
+        )
+        print_model(image)
+
+    @staticmethod
+    def edit(args: CLIImageEditArgs) -> None:
+        with open(args.image, "rb") as file_reader:
+            buffer_reader = BufferReader(
+                file_reader.read(), desc="Image upload progress"
+            )
+
+        if isinstance(args.mask, NotGiven):
+            mask: NotGivenOr[BufferReader] = NOT_GIVEN
+        else:
+            with open(args.mask, "rb") as file_reader:
+                mask = BufferReader(file_reader.read(), desc="Mask progress")
+
+        image = get_client().images.edit(
+            model=args.model,
+            prompt=args.prompt,
+            image=("image", buffer_reader),
+            n=args.num_images,
+            mask=("mask", mask) if not isinstance(mask, NotGiven) else mask,
+            # casts required because the API is typed for enums
+            # but we don't want to validate that here for forwards-compat
+            size=cast(Any, args.size),
+            response_format=cast(Any, args.response_format),
+        )
+        print_model(image)
diff --git a/portkey_ai/_vendor/openai/cli/_api/models.py b/portkey_ai/_vendor/openai/cli/_api/models.py
new file mode 100644
index 00000000..017218fa
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_api/models.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from argparse import ArgumentParser
+
+from .._utils import get_client, print_model
+from .._models import BaseModel
+
+if TYPE_CHECKING:
+    from argparse import _SubParsersAction
+
+
+def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
+    sub = subparser.add_parser("models.list")
+    sub.set_defaults(func=CLIModels.list)
+
+    sub = subparser.add_parser("models.retrieve")
+    sub.add_argument("-i", "--id", required=True, help="The model ID")
+    sub.set_defaults(func=CLIModels.get, args_model=CLIModelIDArgs)
+
+    sub = subparser.add_parser("models.delete")
+    sub.add_argument("-i", "--id", required=True, help="The model ID")
+    sub.set_defaults(func=CLIModels.delete, args_model=CLIModelIDArgs)
+
+
+class CLIModelIDArgs(BaseModel):
+    id: str
+
+
+class CLIModels:
+    @staticmethod
+    def get(args: CLIModelIDArgs) -> None:
+        model = get_client().models.retrieve(model=args.id)
+        print_model(model)
+
+    @staticmethod
+    def delete(args: CLIModelIDArgs) -> None:
+        model = get_client().models.delete(model=args.id)
+        print_model(model)
+
+    @staticmethod
+    def list() -> None:
+        models = get_client().models.list()
+        for model in models:
+            print_model(model)
diff --git a/portkey_ai/_vendor/openai/cli/_cli.py b/portkey_ai/_vendor/openai/cli/_cli.py
new file mode 100644
index 00000000..6c3dbdbb
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_cli.py
@@ -0,0 +1,238 @@
+from __future__ import annotations
+
+import sys
+import logging
+import argparse
+from typing import Any, List, Type, Optional
+from typing_extensions import ClassVar
+
+import httpx
+import pydantic
+
+from ... import openai
+
+from . import _tools
+from .. import _ApiType, __version__
+from ._api import register_commands
+from ._utils import can_use_http2
+from .._types import ProxiesDict
+from ._errors import CLIError, display_error
+from .._compat import PYDANTIC_V2, ConfigDict, model_parse
+from .._models import BaseModel
+from .._exceptions import APIError
+
+logger = logging.getLogger()
+formatter = logging.Formatter("[%(asctime)s] %(message)s")
+handler = logging.StreamHandler(sys.stderr)
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+
+
+class Arguments(BaseModel):
+    if PYDANTIC_V2:
+        model_config: ClassVar[ConfigDict] = ConfigDict(
+            extra="ignore",
+        )
+    else:
+
+        class Config(pydantic.BaseConfig):  # type: ignore
+            extra: Any = pydantic.Extra.ignore  # type: ignore
+
+    verbosity: int
+    version: Optional[str] = None
+
+    api_key: Optional[str]
+    api_base: Optional[str]
+    organization: Optional[str]
+    proxy: Optional[List[str]]
+    api_type: Optional[_ApiType] = None
+    api_version: Optional[str] = None
+
+    # azure
+    azure_endpoint: Optional[str] = None
+    azure_ad_token: Optional[str] = None
+
+    # internal, set by subparsers to parse their specific args
+    args_model: Optional[Type[BaseModel]] = None
+
+    # internal, used so that subparsers can forward unknown arguments
+    unknown_args: List[str] = []
+    allow_unknown_args: bool = False
+
+
+def _build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description=None, prog="openai")
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="count",
+        dest="verbosity",
+        default=0,
+        help="Set verbosity.",
+    )
+    parser.add_argument("-b", "--api-base", help="What API base url to use.")
+    parser.add_argument("-k", "--api-key", help="What API key to use.")
+    parser.add_argument("-p", "--proxy", nargs="+", help="What proxy to use.")
+    parser.add_argument(
+        "-o",
+        "--organization",
+        help="Which organization to run as (will use your default organization if not specified)",
+    )
+    parser.add_argument(
+        "-t",
+        "--api-type",
+        type=str,
+        choices=("openai", "azure"),
+        help="The backend API to call, must be `openai` or `azure`",
+    )
+    parser.add_argument(
+        "--api-version",
+        help="The Azure API version, e.g. 'https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning'",
+    )
+
+    # azure
+    parser.add_argument(
+        "--azure-endpoint",
+        help="The Azure endpoint, e.g. 'https://endpoint.openai.azure.com'",
+    )
+    parser.add_argument(
+        "--azure-ad-token",
+        help="A token from Azure Active Directory, https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id",
+    )
+
+    # prints the package version
+    parser.add_argument(
+        "-V",
+        "--version",
+        action="version",
+        version="%(prog)s " + __version__,
+    )
+
+    def help() -> None:
+        parser.print_help()
+
+    parser.set_defaults(func=help)
+
+    subparsers = parser.add_subparsers()
+    sub_api = subparsers.add_parser("api", help="Direct API calls")
+
+    register_commands(sub_api)
+
+    sub_tools = subparsers.add_parser("tools", help="Client side tools for convenience")
+    _tools.register_commands(sub_tools, subparsers)
+
+    return parser
+
+
+def main() -> int:
+    try:
+        _main()
+    except (APIError, CLIError, pydantic.ValidationError) as err:
+        display_error(err)
+        return 1
+    except KeyboardInterrupt:
+        sys.stderr.write("\n")
+        return 1
+    return 0
+
+
+def _parse_args(
+    parser: argparse.ArgumentParser,
+) -> tuple[argparse.Namespace, Arguments, list[str]]:
+    # argparse by default will strip out the `--` but we want to keep it for unknown arguments
+    if "--" in sys.argv:
+        idx = sys.argv.index("--")
+        known_args = sys.argv[1:idx]
+        unknown_args = sys.argv[idx:]
+    else:
+        known_args = sys.argv[1:]
+        unknown_args = []
+
+    parsed, remaining_unknown = parser.parse_known_args(known_args)
+
+    # append any remaining unknown arguments from the initial parsing
+    remaining_unknown.extend(unknown_args)
+
+    args = model_parse(Arguments, vars(parsed))
+    if not args.allow_unknown_args:
+        # we have to parse twice to ensure any unknown arguments
+        # result in an error if that behaviour is desired
+        parser.parse_args()
+
+    return parsed, args, remaining_unknown
+
+
+def _main() -> None:
+    parser = _build_parser()
+    parsed, args, unknown = _parse_args(parser)
+
+    if args.verbosity != 0:
+        sys.stderr.write("Warning: --verbosity isn't supported yet\n")
+
+    proxies: ProxiesDict = {}
+    if args.proxy is not None:
+        for proxy in args.proxy:
+            key = "https://" if proxy.startswith("https") else "http://"
+            if key in proxies:
+                raise CLIError(
+                    f"Multiple {key} proxies given - only the last one would be used"
+                )
+
+            proxies[key] = proxy
+
+    http_client = httpx.Client(
+        proxies=proxies or None,
+        http2=can_use_http2(),
+    )
+    openai.http_client = http_client
+
+    if args.organization:
+        openai.organization = args.organization
+
+    if args.api_key:
+        openai.api_key = args.api_key
+
+    if args.api_base:
+        openai.base_url = args.api_base
+
+    # azure
+    if args.api_type is not None:
+        openai.api_type = args.api_type
+
+    if args.azure_endpoint is not None:
+        openai.azure_endpoint = args.azure_endpoint
+
+    if args.api_version is not None:
+        openai.api_version = args.api_version
+
+    if args.azure_ad_token is not None:
+        openai.azure_ad_token = args.azure_ad_token
+
+    try:
+        if args.args_model:
+            parsed.func(
+                model_parse(
+                    args.args_model,
+                    {
+                        **{
+                            # we omit None values so that they can be defaulted to `NotGiven`
+                            # and we'll strip it from the API request
+                            key: value
+                            for key, value in vars(parsed).items()
+                            if value is not None
+                        },
+                        "unknown_args": unknown,
+                    },
+                )
+            )
+        else:
+            parsed.func()
+    finally:
+        try:
+            http_client.close()
+        except Exception:
+            pass
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/portkey_ai/_vendor/openai/cli/_errors.py b/portkey_ai/_vendor/openai/cli/_errors.py
new file mode 100644
index 00000000..18d5e403
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_errors.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+import sys
+
+import pydantic
+
+from ._utils import Colors, organization_info
+from .._exceptions import APIError, OpenAIError
+
+
+class CLIError(OpenAIError):
+    ...
+
+
+class SilentCLIError(CLIError):
+    ...
+
+
+def display_error(err: CLIError | APIError | pydantic.ValidationError) -> None:
+    if isinstance(err, SilentCLIError):
+        return
+
+    sys.stderr.write(
+        "{}{}Error:{} {}\n".format(organization_info(), Colors.FAIL, Colors.ENDC, err)
+    )
diff --git a/portkey_ai/_vendor/openai/cli/_models.py b/portkey_ai/_vendor/openai/cli/_models.py
new file mode 100644
index 00000000..b8c7247e
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_models.py
@@ -0,0 +1,19 @@
+from typing import Any
+from typing_extensions import ClassVar
+
+import pydantic
+
+from .. import _models
+from .._compat import PYDANTIC_V2, ConfigDict
+
+
+class BaseModel(_models.BaseModel):
+    if PYDANTIC_V2:
+        model_config: ClassVar[ConfigDict] = ConfigDict(
+            extra="ignore", arbitrary_types_allowed=True
+        )
+    else:
+
+        class Config(pydantic.BaseConfig):  # type: ignore
+            extra: Any = pydantic.Extra.ignore  # type: ignore
+            arbitrary_types_allowed: bool = True
diff --git a/portkey_ai/_vendor/openai/cli/_progress.py b/portkey_ai/_vendor/openai/cli/_progress.py
new file mode 100644
index 00000000..8a7f2525
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_progress.py
@@ -0,0 +1,59 @@
+from __future__ import annotations
+
+import io
+from typing import Callable
+from typing_extensions import override
+
+
+class CancelledError(Exception):
+    def __init__(self, msg: str) -> None:
+        self.msg = msg
+        super().__init__(msg)
+
+    @override
+    def __str__(self) -> str:
+        return self.msg
+
+    __repr__ = __str__
+
+
+class BufferReader(io.BytesIO):
+    def __init__(self, buf: bytes = b"", desc: str | None = None) -> None:
+        super().__init__(buf)
+        self._len = len(buf)
+        self._progress = 0
+        self._callback = progress(len(buf), desc=desc)
+
+    def __len__(self) -> int:
+        return self._len
+
+    @override
+    def read(self, n: int | None = -1) -> bytes:
+        chunk = io.BytesIO.read(self, n)
+        self._progress += len(chunk)
+
+        try:
+            self._callback(self._progress)
+        except Exception as e:  # catches exception from the callback
+            raise CancelledError("The upload was cancelled: {}".format(e)) from e
+
+        return chunk
+
+
+def progress(total: float, desc: str | None) -> Callable[[float], None]:
+    import tqdm
+
+    meter = tqdm.tqdm(total=total, unit_scale=True, desc=desc)
+
+    def incr(progress: float) -> None:
+        meter.n = progress
+        if progress == total:
+            meter.close()
+        else:
+            meter.refresh()
+
+    return incr
+
+
+def MB(i: int) -> int:
+    return int(i // 1024**2)
diff --git a/portkey_ai/_vendor/openai/cli/_tools/__init__.py b/portkey_ai/_vendor/openai/cli/_tools/__init__.py
new file mode 100644
index 00000000..56a0260a
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_tools/__init__.py
@@ -0,0 +1 @@
+from ._main import register_commands as register_commands
diff --git a/portkey_ai/_vendor/openai/cli/_tools/_main.py b/portkey_ai/_vendor/openai/cli/_tools/_main.py
new file mode 100644
index 00000000..97481dbb
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_tools/_main.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from argparse import ArgumentParser
+
+from . import migrate, fine_tunes
+
+if TYPE_CHECKING:
+    from argparse import _SubParsersAction
+
+
+def register_commands(
+    parser: ArgumentParser, subparser: _SubParsersAction[ArgumentParser]
+) -> None:
+    migrate.register(subparser)
+
+    namespaced = parser.add_subparsers(
+        title="Tools", help="Convenience client side tools"
+    )
+
+    fine_tunes.register(namespaced)
diff --git a/portkey_ai/_vendor/openai/cli/_tools/fine_tunes.py b/portkey_ai/_vendor/openai/cli/_tools/fine_tunes.py
new file mode 100644
index 00000000..2128b889
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_tools/fine_tunes.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+import sys
+from typing import TYPE_CHECKING
+from argparse import ArgumentParser
+
+from .._models import BaseModel
+from ...lib._validators import (
+    get_validators,
+    write_out_file,
+    read_any_format,
+    apply_validators,
+    apply_necessary_remediation,
+)
+
+if TYPE_CHECKING:
+    from argparse import _SubParsersAction
+
+
+def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
+    sub = subparser.add_parser("fine_tunes.prepare_data")
+    sub.add_argument(
+        "-f",
+        "--file",
+        required=True,
+        help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing prompt-completion examples to be analyzed."
+        "This should be the local file path.",
+    )
+    sub.add_argument(
+        "-q",
+        "--quiet",
+        required=False,
+        action="store_true",
+        help="Auto accepts all suggestions, without asking for user input. To be used within scripts.",
+    )
+    sub.set_defaults(func=prepare_data, args_model=PrepareDataArgs)
+
+
+class PrepareDataArgs(BaseModel):
+    file: str
+
+    quiet: bool
+
+
+def prepare_data(args: PrepareDataArgs) -> None:
+    sys.stdout.write("Analyzing...\n")
+    fname = args.file
+    auto_accept = args.quiet
+    df, remediation = read_any_format(fname)
+    apply_necessary_remediation(None, remediation)
+
+    validators = get_validators()
+
+    assert df is not None
+
+    apply_validators(
+        df,
+        fname,
+        remediation,
+        validators,
+        auto_accept,
+        write_out_file_func=write_out_file,
+    )
diff --git a/portkey_ai/_vendor/openai/cli/_tools/migrate.py b/portkey_ai/_vendor/openai/cli/_tools/migrate.py
new file mode 100644
index 00000000..4e04b0c1
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_tools/migrate.py
@@ -0,0 +1,185 @@
+from __future__ import annotations
+
+import os
+import sys
+import json
+import shutil
+import tarfile
+import platform
+import subprocess
+from typing import TYPE_CHECKING, List
+from pathlib import Path
+from argparse import ArgumentParser
+
+import httpx
+
+from .._errors import CLIError, SilentCLIError
+from .._models import BaseModel
+
+if TYPE_CHECKING:
+    from argparse import _SubParsersAction
+
+
+def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
+    sub = subparser.add_parser("migrate")
+    sub.set_defaults(func=migrate, args_model=MigrateArgs, allow_unknown_args=True)
+
+    sub = subparser.add_parser("grit")
+    sub.set_defaults(func=grit, args_model=GritArgs, allow_unknown_args=True)
+
+
+class GritArgs(BaseModel):
+    # internal
+    unknown_args: List[str] = []
+
+
+def grit(args: GritArgs) -> None:
+    grit_path = install()
+
+    try:
+        subprocess.check_call([grit_path, *args.unknown_args])
+    except subprocess.CalledProcessError:
+        # stdout and stderr are forwarded by subprocess so an error will already
+        # have been displayed
+        raise SilentCLIError() from None
+
+
+class MigrateArgs(BaseModel):
+    # internal
+    unknown_args: List[str] = []
+
+
+def migrate(args: MigrateArgs) -> None:
+    grit_path = install()
+
+    try:
+        subprocess.check_call([grit_path, "apply", "openai", *args.unknown_args])
+    except subprocess.CalledProcessError:
+        # stdout and stderr are forwarded by subprocess so an error will already
+        # have been displayed
+        raise SilentCLIError() from None
+
+
+# handles downloading the Grit CLI until they provide their own PyPi package
+
+KEYGEN_ACCOUNT = "custodian-dev"
+
+
+def _cache_dir() -> Path:
+    xdg = os.environ.get("XDG_CACHE_HOME")
+    if xdg is not None:
+        return Path(xdg)
+
+    return Path.home() / ".cache"
+
+
+def _debug(message: str) -> None:
+    if not os.environ.get("DEBUG"):
+        return
+
+    sys.stdout.write(f"[DEBUG]: {message}\n")
+
+
+def install() -> Path:
+    """Installs the Grit CLI and returns the location of the binary"""
+    if sys.platform == "win32":
+        raise CLIError("Windows is not supported yet in the migration CLI")
+
+    platform = "macos" if sys.platform == "darwin" else "linux"
+
+    dir_name = _cache_dir() / "openai-python"
+    install_dir = dir_name / ".install"
+    target_dir = install_dir / "bin"
+
+    target_path = target_dir / "marzano"
+    temp_file = target_dir / "marzano.tmp"
+
+    if target_path.exists():
+        _debug(f"{target_path} already exists")
+        sys.stdout.flush()
+        return target_path
+
+    _debug(f"Using Grit CLI path: {target_path}")
+
+    target_dir.mkdir(parents=True, exist_ok=True)
+
+    if temp_file.exists():
+        temp_file.unlink()
+
+    arch = _get_arch()
+    _debug(f"Using architecture {arch}")
+
+    file_name = f"marzano-{platform}-{arch}"
+    meta_url = (
+        f"https://api.keygen.sh/v1/accounts/{KEYGEN_ACCOUNT}/artifacts/{file_name}"
+    )
+
+    sys.stdout.write(f"Retrieving Grit CLI metadata from {meta_url}\n")
+    with httpx.Client() as client:
+        response = client.get(meta_url)  # pyright: ignore[reportUnknownMemberType]
+
+        data = response.json()
+        errors = data.get("errors")
+        if errors:
+            for error in errors:
+                sys.stdout.write(f"{error}\n")
+
+            raise CLIError("Could not locate Grit CLI binary - see above errors")
+
+        write_manifest(
+            install_dir, data["data"]["relationships"]["release"]["data"]["id"]
+        )
+
+        link = data["data"]["links"]["redirect"]
+        _debug(f"Redirect URL {link}")
+
+        download_response = client.get(link)  # pyright: ignore[reportUnknownMemberType]
+        with open(temp_file, "wb") as file:
+            for chunk in download_response.iter_bytes():
+                file.write(chunk)
+
+    unpacked_dir = target_dir / "cli-bin"
+    unpacked_dir.mkdir(parents=True, exist_ok=True)
+
+    with tarfile.open(temp_file, "r:gz") as archive:
+        archive.extractall(unpacked_dir, filter="data")
+
+    for item in unpacked_dir.iterdir():
+        item.rename(target_dir / item.name)
+
+    shutil.rmtree(unpacked_dir)
+    os.remove(temp_file)
+    os.chmod(target_path, 0o755)
+
+    sys.stdout.flush()
+
+    return target_path
+
+
+def _get_arch() -> str:
+    architecture = platform.machine().lower()
+
+    # Map the architecture names to Node.js equivalents
+    arch_map = {
+        "x86_64": "x64",
+        "amd64": "x64",
+        "armv7l": "arm",
+        "aarch64": "arm64",
+    }
+
+    return arch_map.get(architecture, architecture)
+
+
+def write_manifest(install_path: Path, release: str) -> None:
+    manifest = {
+        "installPath": str(install_path),
+        "binaries": {
+            "marzano": {
+                "name": "marzano",
+                "release": release,
+            },
+        },
+    }
+    manifest_path = Path(install_path) / "manifests.json"
+    with open(manifest_path, "w") as f:
+        json.dump(manifest, f, indent=2)
diff --git a/portkey_ai/_vendor/openai/cli/_utils.py b/portkey_ai/_vendor/openai/cli/_utils.py
new file mode 100644
index 00000000..1d9415a6
--- /dev/null
+++ b/portkey_ai/_vendor/openai/cli/_utils.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+import sys
+
+from ... import openai
+
+from .. import OpenAI, _load_client
+from .._compat import model_json
+from .._models import BaseModel
+
+
+class Colors:
+    HEADER = "\033[95m"
+    OKBLUE = "\033[94m"
+    OKGREEN = "\033[92m"
+    WARNING = "\033[93m"
+    FAIL = "\033[91m"
+    ENDC = "\033[0m"
+    BOLD = "\033[1m"
+    UNDERLINE = "\033[4m"
+
+
+def get_client() -> OpenAI:
+    return _load_client()
+
+
+def organization_info() -> str:
+    organization = openai.organization
+    if organization is not None:
+        return "[organization={}] ".format(organization)
+
+    return ""
+
+
+def print_model(model: BaseModel) -> None:
+    sys.stdout.write(model_json(model, indent=2) + "\n")
+
+
+def can_use_http2() -> bool:
+    try:
+        import h2  # type: ignore  # noqa
+    except ImportError:
+        return False
+
+    return True
diff --git a/portkey_ai/_vendor/openai/pagination.py b/portkey_ai/_vendor/openai/pagination.py
new file mode 100644
index 00000000..82936382
--- /dev/null
+++ b/portkey_ai/_vendor/openai/pagination.py
@@ -0,0 +1,107 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Any, List, Generic, TypeVar, Optional, cast
+from typing_extensions import Protocol, override, runtime_checkable
+
+from ._base_client import BasePage, PageInfo, BaseSyncPage, BaseAsyncPage
+
+__all__ = ["SyncPage", "AsyncPage", "SyncCursorPage", "AsyncCursorPage"]
+
+_T = TypeVar("_T")
+
+
+@runtime_checkable
+class CursorPageItem(Protocol):
+    id: Optional[str]
+
+
+class SyncPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]):
+    """Note: no pagination actually occurs yet, this is for forwards-compatibility."""
+
+    data: List[_T]
+    object: str
+
+    @override
+    def _get_page_items(self) -> List[_T]:
+        data = self.data
+        if not data:
+            return []
+        return data
+
+    @override
+    def next_page_info(self) -> None:
+        """
+        This page represents a response that isn't actually paginated at the API level
+        so there will never be a next page.
+        """
+        return None
+
+
+class AsyncPage(BaseAsyncPage[_T], BasePage[_T], Generic[_T]):
+    """Note: no pagination actually occurs yet, this is for forwards-compatibility."""
+
+    data: List[_T]
+    object: str
+
+    @override
+    def _get_page_items(self) -> List[_T]:
+        data = self.data
+        if not data:
+            return []
+        return data
+
+    @override
+    def next_page_info(self) -> None:
+        """
+        This page represents a response that isn't actually paginated at the API level
+        so there will never be a next page.
+        """
+        return None
+
+
+class SyncCursorPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]):
+    data: List[_T]
+
+    @override
+    def _get_page_items(self) -> List[_T]:
+        data = self.data
+        if not data:
+            return []
+        return data
+
+    @override
+    def next_page_info(self) -> Optional[PageInfo]:
+        data = self.data
+        if not data:
+            return None
+
+        item = cast(Any, data[-1])
+        if not isinstance(item, CursorPageItem) or item.id is None:
+            # TODO emit warning log
+            return None
+
+        return PageInfo(params={"after": item.id})
+
+
+class AsyncCursorPage(BaseAsyncPage[_T], BasePage[_T], Generic[_T]):
+    data: List[_T]
+
+    @override
+    def _get_page_items(self) -> List[_T]:
+        data = self.data
+        if not data:
+            return []
+        return data
+
+    @override
+    def next_page_info(self) -> Optional[PageInfo]:
+        data = self.data
+        if not data:
+            return None
+
+        item = cast(Any, data[-1])
+        if not isinstance(item, CursorPageItem) or item.id is None:
+            # TODO emit warning log
+            return None
+
+        return PageInfo(params={"after": item.id})
diff --git a/portkey_ai/_vendor/openai/py.typed b/portkey_ai/_vendor/openai/py.typed
new file mode 100644
index 00000000..e69de29b
diff --git a/portkey_ai/_vendor/openai/resources/__init__.py b/portkey_ai/_vendor/openai/resources/__init__.py
new file mode 100644
index 00000000..ecae4243
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/__init__.py
@@ -0,0 +1,159 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .beta import (
+    Beta,
+    AsyncBeta,
+    BetaWithRawResponse,
+    AsyncBetaWithRawResponse,
+    BetaWithStreamingResponse,
+    AsyncBetaWithStreamingResponse,
+)
+from .chat import (
+    Chat,
+    AsyncChat,
+    ChatWithRawResponse,
+    AsyncChatWithRawResponse,
+    ChatWithStreamingResponse,
+    AsyncChatWithStreamingResponse,
+)
+from .audio import (
+    Audio,
+    AsyncAudio,
+    AudioWithRawResponse,
+    AsyncAudioWithRawResponse,
+    AudioWithStreamingResponse,
+    AsyncAudioWithStreamingResponse,
+)
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from .images import (
+    Images,
+    AsyncImages,
+    ImagesWithRawResponse,
+    AsyncImagesWithRawResponse,
+    ImagesWithStreamingResponse,
+    AsyncImagesWithStreamingResponse,
+)
+from .models import (
+    Models,
+    AsyncModels,
+    ModelsWithRawResponse,
+    AsyncModelsWithRawResponse,
+    ModelsWithStreamingResponse,
+    AsyncModelsWithStreamingResponse,
+)
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
+from .embeddings import (
+    Embeddings,
+    AsyncEmbeddings,
+    EmbeddingsWithRawResponse,
+    AsyncEmbeddingsWithRawResponse,
+    EmbeddingsWithStreamingResponse,
+    AsyncEmbeddingsWithStreamingResponse,
+)
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
+from .fine_tuning import (
+    FineTuning,
+    AsyncFineTuning,
+    FineTuningWithRawResponse,
+    AsyncFineTuningWithRawResponse,
+    FineTuningWithStreamingResponse,
+    AsyncFineTuningWithStreamingResponse,
+)
+from .moderations import (
+    Moderations,
+    AsyncModerations,
+    ModerationsWithRawResponse,
+    AsyncModerationsWithRawResponse,
+    ModerationsWithStreamingResponse,
+    AsyncModerationsWithStreamingResponse,
+)
+
+__all__ = [
+    "Completions",
+    "AsyncCompletions",
+    "CompletionsWithRawResponse",
+    "AsyncCompletionsWithRawResponse",
+    "CompletionsWithStreamingResponse",
+    "AsyncCompletionsWithStreamingResponse",
+    "Chat",
+    "AsyncChat",
+    "ChatWithRawResponse",
+    "AsyncChatWithRawResponse",
+    "ChatWithStreamingResponse",
+    "AsyncChatWithStreamingResponse",
+    "Embeddings",
+    "AsyncEmbeddings",
+    "EmbeddingsWithRawResponse",
+    "AsyncEmbeddingsWithRawResponse",
+    "EmbeddingsWithStreamingResponse",
+    "AsyncEmbeddingsWithStreamingResponse",
+    "Files",
+    "AsyncFiles",
+    "FilesWithRawResponse",
+    "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
+    "Images",
+    "AsyncImages",
+    "ImagesWithRawResponse",
+    "AsyncImagesWithRawResponse",
+    "ImagesWithStreamingResponse",
+    "AsyncImagesWithStreamingResponse",
+    "Audio",
+    "AsyncAudio",
+    "AudioWithRawResponse",
+    "AsyncAudioWithRawResponse",
+    "AudioWithStreamingResponse",
+    "AsyncAudioWithStreamingResponse",
+    "Moderations",
+    "AsyncModerations",
+    "ModerationsWithRawResponse",
+    "AsyncModerationsWithRawResponse",
+    "ModerationsWithStreamingResponse",
+    "AsyncModerationsWithStreamingResponse",
+    "Models",
+    "AsyncModels",
+    "ModelsWithRawResponse",
+    "AsyncModelsWithRawResponse",
+    "ModelsWithStreamingResponse",
+    "AsyncModelsWithStreamingResponse",
+    "FineTuning",
+    "AsyncFineTuning",
+    "FineTuningWithRawResponse",
+    "AsyncFineTuningWithRawResponse",
+    "FineTuningWithStreamingResponse",
+    "AsyncFineTuningWithStreamingResponse",
+    "Beta",
+    "AsyncBeta",
+    "BetaWithRawResponse",
+    "AsyncBetaWithRawResponse",
+    "BetaWithStreamingResponse",
+    "AsyncBetaWithStreamingResponse",
+    "Batches",
+    "AsyncBatches",
+    "BatchesWithRawResponse",
+    "AsyncBatchesWithRawResponse",
+    "BatchesWithStreamingResponse",
+    "AsyncBatchesWithStreamingResponse",
+]
diff --git a/portkey_ai/_vendor/openai/resources/audio/__init__.py b/portkey_ai/_vendor/openai/resources/audio/__init__.py
new file mode 100644
index 00000000..7da1d2db
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/audio/__init__.py
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .audio import (
+    Audio,
+    AsyncAudio,
+    AudioWithRawResponse,
+    AsyncAudioWithRawResponse,
+    AudioWithStreamingResponse,
+    AsyncAudioWithStreamingResponse,
+)
+from .speech import (
+    Speech,
+    AsyncSpeech,
+    SpeechWithRawResponse,
+    AsyncSpeechWithRawResponse,
+    SpeechWithStreamingResponse,
+    AsyncSpeechWithStreamingResponse,
+)
+from .translations import (
+    Translations,
+    AsyncTranslations,
+    TranslationsWithRawResponse,
+    AsyncTranslationsWithRawResponse,
+    TranslationsWithStreamingResponse,
+    AsyncTranslationsWithStreamingResponse,
+)
+from .transcriptions import (
+    Transcriptions,
+    AsyncTranscriptions,
+    TranscriptionsWithRawResponse,
+    AsyncTranscriptionsWithRawResponse,
+    TranscriptionsWithStreamingResponse,
+    AsyncTranscriptionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Transcriptions",
+    "AsyncTranscriptions",
+    "TranscriptionsWithRawResponse",
+    "AsyncTranscriptionsWithRawResponse",
+    "TranscriptionsWithStreamingResponse",
+    "AsyncTranscriptionsWithStreamingResponse",
+    "Translations",
+    "AsyncTranslations",
+    "TranslationsWithRawResponse",
+    "AsyncTranslationsWithRawResponse",
+    "TranslationsWithStreamingResponse",
+    "AsyncTranslationsWithStreamingResponse",
+    "Speech",
+    "AsyncSpeech",
+    "SpeechWithRawResponse",
+    "AsyncSpeechWithRawResponse",
+    "SpeechWithStreamingResponse",
+    "AsyncSpeechWithStreamingResponse",
+    "Audio",
+    "AsyncAudio",
+    "AudioWithRawResponse",
+    "AsyncAudioWithRawResponse",
+    "AudioWithStreamingResponse",
+    "AsyncAudioWithStreamingResponse",
+]
diff --git a/portkey_ai/_vendor/openai/resources/audio/audio.py b/portkey_ai/_vendor/openai/resources/audio/audio.py
new file mode 100644
index 00000000..537ad573
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/audio/audio.py
@@ -0,0 +1,144 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .speech import (
+    Speech,
+    AsyncSpeech,
+    SpeechWithRawResponse,
+    AsyncSpeechWithRawResponse,
+    SpeechWithStreamingResponse,
+    AsyncSpeechWithStreamingResponse,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .translations import (
+    Translations,
+    AsyncTranslations,
+    TranslationsWithRawResponse,
+    AsyncTranslationsWithRawResponse,
+    TranslationsWithStreamingResponse,
+    AsyncTranslationsWithStreamingResponse,
+)
+from .transcriptions import (
+    Transcriptions,
+    AsyncTranscriptions,
+    TranscriptionsWithRawResponse,
+    AsyncTranscriptionsWithRawResponse,
+    TranscriptionsWithStreamingResponse,
+    AsyncTranscriptionsWithStreamingResponse,
+)
+
+__all__ = ["Audio", "AsyncAudio"]
+
+
+class Audio(SyncAPIResource):
+    @cached_property
+    def transcriptions(self) -> Transcriptions:
+        return Transcriptions(self._client)
+
+    @cached_property
+    def translations(self) -> Translations:
+        return Translations(self._client)
+
+    @cached_property
+    def speech(self) -> Speech:
+        return Speech(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AudioWithRawResponse:
+        return AudioWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AudioWithStreamingResponse:
+        return AudioWithStreamingResponse(self)
+
+
+class AsyncAudio(AsyncAPIResource):
+    @cached_property
+    def transcriptions(self) -> AsyncTranscriptions:
+        return AsyncTranscriptions(self._client)
+
+    @cached_property
+    def translations(self) -> AsyncTranslations:
+        return AsyncTranslations(self._client)
+
+    @cached_property
+    def speech(self) -> AsyncSpeech:
+        return AsyncSpeech(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncAudioWithRawResponse:
+        return AsyncAudioWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAudioWithStreamingResponse:
+        return AsyncAudioWithStreamingResponse(self)
+
+
+class AudioWithRawResponse:
+    def __init__(self, audio: Audio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> TranscriptionsWithRawResponse:
+        return TranscriptionsWithRawResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> TranslationsWithRawResponse:
+        return TranslationsWithRawResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> SpeechWithRawResponse:
+        return SpeechWithRawResponse(self._audio.speech)
+
+
+class AsyncAudioWithRawResponse:
+    def __init__(self, audio: AsyncAudio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> AsyncTranscriptionsWithRawResponse:
+        return AsyncTranscriptionsWithRawResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> AsyncTranslationsWithRawResponse:
+        return AsyncTranslationsWithRawResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> AsyncSpeechWithRawResponse:
+        return AsyncSpeechWithRawResponse(self._audio.speech)
+
+
+class AudioWithStreamingResponse:
+    def __init__(self, audio: Audio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> TranscriptionsWithStreamingResponse:
+        return TranscriptionsWithStreamingResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> TranslationsWithStreamingResponse:
+        return TranslationsWithStreamingResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> SpeechWithStreamingResponse:
+        return SpeechWithStreamingResponse(self._audio.speech)
+
+
+class AsyncAudioWithStreamingResponse:
+    def __init__(self, audio: AsyncAudio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> AsyncTranscriptionsWithStreamingResponse:
+        return AsyncTranscriptionsWithStreamingResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> AsyncTranslationsWithStreamingResponse:
+        return AsyncTranslationsWithStreamingResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> AsyncSpeechWithStreamingResponse:
+        return AsyncSpeechWithStreamingResponse(self._audio.speech)
diff --git a/portkey_ai/_vendor/openai/resources/audio/speech.py b/portkey_ai/_vendor/openai/resources/audio/speech.py
new file mode 100644
index 00000000..7d33b54f
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/audio/speech.py
@@ -0,0 +1,221 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
+from ...types.audio import speech_create_params
+from ..._base_client import (
+    make_request_options,
+)
+
+__all__ = ["Speech", "AsyncSpeech"]
+
+
+class Speech(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> SpeechWithRawResponse:
+        return SpeechWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> SpeechWithStreamingResponse:
+        return SpeechWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        input: str,
+        model: Union[str, Literal["tts-1", "tts-1-hd"]],
+        voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
+        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"]
+        | NotGiven = NOT_GIVEN,
+        speed: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Generates audio from the input text.
+
+        Args:
+          input: The text to generate audio for. The maximum length is 4096 characters.
+
+          model:
+              One of the available [TTS models](https://platform.openai.com/docs/models/tts):
+              `tts-1` or `tts-1-hd`
+
+          voice: The voice to use when generating the audio. Supported voices are `alloy`,
+              `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are
+              available in the
+              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
+
+          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
+              `wav`, and `pcm`.
+
+          speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
+              the default.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
+        return self._post(
+            "/audio/speech",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "voice": voice,
+                    "response_format": response_format,
+                    "speed": speed,
+                },
+                speech_create_params.SpeechCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+
+class AsyncSpeech(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncSpeechWithRawResponse:
+        return AsyncSpeechWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncSpeechWithStreamingResponse:
+        return AsyncSpeechWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        input: str,
+        model: Union[str, Literal["tts-1", "tts-1-hd"]],
+        voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
+        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"]
+        | NotGiven = NOT_GIVEN,
+        speed: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Generates audio from the input text.
+
+        Args:
+          input: The text to generate audio for. The maximum length is 4096 characters.
+
+          model:
+              One of the available [TTS models](https://platform.openai.com/docs/models/tts):
+              `tts-1` or `tts-1-hd`
+
+          voice: The voice to use when generating the audio. Supported voices are `alloy`,
+              `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are
+              available in the
+              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
+
+          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
+              `wav`, and `pcm`.
+
+          speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
+              the default.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
+        return await self._post(
+            "/audio/speech",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "voice": voice,
+                    "response_format": response_format,
+                    "speed": speed,
+                },
+                speech_create_params.SpeechCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+
+class SpeechWithRawResponse:
+    def __init__(self, speech: Speech) -> None:
+        self._speech = speech
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            speech.create,
+        )
+
+
+class AsyncSpeechWithRawResponse:
+    def __init__(self, speech: AsyncSpeech) -> None:
+        self._speech = speech
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            speech.create,
+        )
+
+
+class SpeechWithStreamingResponse:
+    def __init__(self, speech: Speech) -> None:
+        self._speech = speech
+
+        self.create = to_custom_streamed_response_wrapper(
+            speech.create,
+            StreamedBinaryAPIResponse,
+        )
+
+
+class AsyncSpeechWithStreamingResponse:
+    def __init__(self, speech: AsyncSpeech) -> None:
+        self._speech = speech
+
+        self.create = async_to_custom_streamed_response_wrapper(
+            speech.create,
+            AsyncStreamedBinaryAPIResponse,
+        )
diff --git a/portkey_ai/_vendor/openai/resources/audio/transcriptions.py b/portkey_ai/_vendor/openai/resources/audio/transcriptions.py
new file mode 100644
index 00000000..8d119f43
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/audio/transcriptions.py
@@ -0,0 +1,280 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Mapping, cast
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ..._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...types.audio import transcription_create_params
+from ..._base_client import (
+    make_request_options,
+)
+from ...types.audio.transcription import Transcription
+
+__all__ = ["Transcriptions", "AsyncTranscriptions"]
+
+
+class Transcriptions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> TranscriptionsWithRawResponse:
+        return TranscriptionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> TranscriptionsWithStreamingResponse:
+        return TranscriptionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, Literal["whisper-1"]],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Literal["json", "text", "srt", "verbose_json", "vtt"]
+        | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Transcription:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
+              improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+              should match the audio language.
+
+          response_format: The format of the transcript output, in one of these options: `json`, `text`,
+              `srt`, `verbose_json`, or `vtt`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "model": model,
+                "language": language,
+                "prompt": prompt,
+                "response_format": response_format,
+                "temperature": temperature,
+                "timestamp_granularities": timestamp_granularities,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        if files:
+            # It should be noted that the actual Content-Type header that will be
+            # sent to the server will contain a `boundary` parameter, e.g.
+            # multipart/form-data; boundary=---abc--
+            extra_headers = {
+                "Content-Type": "multipart/form-data",
+                **(extra_headers or {}),
+            }
+        return self._post(
+            "/audio/transcriptions",
+            body=maybe_transform(
+                body, transcription_create_params.TranscriptionCreateParams
+            ),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Transcription,
+        )
+
+
+class AsyncTranscriptions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncTranscriptionsWithRawResponse:
+        return AsyncTranscriptionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncTranscriptionsWithStreamingResponse:
+        return AsyncTranscriptionsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, Literal["whisper-1"]],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Literal["json", "text", "srt", "verbose_json", "vtt"]
+        | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Transcription:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
+              improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+              should match the audio language.
+
+          response_format: The format of the transcript output, in one of these options: `json`, `text`,
+              `srt`, `verbose_json`, or `vtt`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "model": model,
+                "language": language,
+                "prompt": prompt,
+                "response_format": response_format,
+                "temperature": temperature,
+                "timestamp_granularities": timestamp_granularities,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        if files:
+            # It should be noted that the actual Content-Type header that will be
+            # sent to the server will contain a `boundary` parameter, e.g.
+            # multipart/form-data; boundary=---abc--
+            extra_headers = {
+                "Content-Type": "multipart/form-data",
+                **(extra_headers or {}),
+            }
+        return await self._post(
+            "/audio/transcriptions",
+            body=await async_maybe_transform(
+                body, transcription_create_params.TranscriptionCreateParams
+            ),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Transcription,
+        )
+
+
+class TranscriptionsWithRawResponse:
+    def __init__(self, transcriptions: Transcriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            transcriptions.create,
+        )
+
+
+class AsyncTranscriptionsWithRawResponse:
+    def __init__(self, transcriptions: AsyncTranscriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            transcriptions.create,
+        )
+
+
+class TranscriptionsWithStreamingResponse:
+    def __init__(self, transcriptions: Transcriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = to_streamed_response_wrapper(
+            transcriptions.create,
+        )
+
+
+class AsyncTranscriptionsWithStreamingResponse:
+    def __init__(self, transcriptions: AsyncTranscriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = async_to_streamed_response_wrapper(
+            transcriptions.create,
+        )
diff --git a/portkey_ai/_vendor/openai/resources/audio/translations.py b/portkey_ai/_vendor/openai/resources/audio/translations.py
new file mode 100644
index 00000000..5947aa66
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/audio/translations.py
@@ -0,0 +1,246 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Mapping, cast
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ..._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...types.audio import translation_create_params
+from ..._base_client import (
+    make_request_options,
+)
+from ...types.audio.translation import Translation
+
+__all__ = ["Translations", "AsyncTranslations"]
+
+
+class Translations(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> TranslationsWithRawResponse:
+        return TranslationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> TranslationsWithStreamingResponse:
+        return TranslationsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, Literal["whisper-1"]],
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Translation:
+        """
+        Translates audio into English.
+
+        Args:
+          file: The audio file object (not file name) translate, in one of these formats: flac,
+              mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+              should be in English.
+
+          response_format: The format of the transcript output, in one of these options: `json`, `text`,
+              `srt`, `verbose_json`, or `vtt`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "model": model,
+                "prompt": prompt,
+                "response_format": response_format,
+                "temperature": temperature,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        if files:
+            # It should be noted that the actual Content-Type header that will be
+            # sent to the server will contain a `boundary` parameter, e.g.
+            # multipart/form-data; boundary=---abc--
+            extra_headers = {
+                "Content-Type": "multipart/form-data",
+                **(extra_headers or {}),
+            }
+        return self._post(
+            "/audio/translations",
+            body=maybe_transform(
+                body, translation_create_params.TranslationCreateParams
+            ),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Translation,
+        )
+
+
+class AsyncTranslations(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncTranslationsWithRawResponse:
+        return AsyncTranslationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncTranslationsWithStreamingResponse:
+        return AsyncTranslationsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, Literal["whisper-1"]],
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Translation:
+        """
+        Translates audio into English.
+
+        Args:
+          file: The audio file object (not file name) translate, in one of these formats: flac,
+              mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+              should be in English.
+
+          response_format: The format of the transcript output, in one of these options: `json`, `text`,
+              `srt`, `verbose_json`, or `vtt`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "model": model,
+                "prompt": prompt,
+                "response_format": response_format,
+                "temperature": temperature,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        if files:
+            # It should be noted that the actual Content-Type header that will be
+            # sent to the server will contain a `boundary` parameter, e.g.
+            # multipart/form-data; boundary=---abc--
+            extra_headers = {
+                "Content-Type": "multipart/form-data",
+                **(extra_headers or {}),
+            }
+        return await self._post(
+            "/audio/translations",
+            body=await async_maybe_transform(
+                body, translation_create_params.TranslationCreateParams
+            ),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Translation,
+        )
+
+
+class TranslationsWithRawResponse:
+    def __init__(self, translations: Translations) -> None:
+        self._translations = translations
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            translations.create,
+        )
+
+
+class AsyncTranslationsWithRawResponse:
+    def __init__(self, translations: AsyncTranslations) -> None:
+        self._translations = translations
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            translations.create,
+        )
+
+
+class TranslationsWithStreamingResponse:
+    def __init__(self, translations: Translations) -> None:
+        self._translations = translations
+
+        self.create = to_streamed_response_wrapper(
+            translations.create,
+        )
+
+
+class AsyncTranslationsWithStreamingResponse:
+    def __init__(self, translations: AsyncTranslations) -> None:
+        self._translations = translations
+
+        self.create = async_to_streamed_response_wrapper(
+            translations.create,
+        )
diff --git a/portkey_ai/_vendor/openai/resources/batches.py b/portkey_ai/_vendor/openai/resources/batches.py
new file mode 100644
index 00000000..f2d6997c
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/batches.py
@@ -0,0 +1,501 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import batch_list_params, batch_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..pagination import SyncCursorPage, AsyncCursorPage
+from ..types.batch import Batch
+from .._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+
+__all__ = ["Batches", "AsyncBatches"]
+
+
+class Batches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> BatchesWithRawResponse:
+        return BatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BatchesWithStreamingResponse:
+        return BatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        completion_window: Literal["24h"],
+        endpoint: Literal["/v1/chat/completions", "/v1/embeddings"],
+        input_file_id: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Creates and executes a batch from an uploaded file of requests
+
+        Args:
+          completion_window: The time frame within which the batch should be processed. Currently only `24h`
+              is supported.
+
+          endpoint: The endpoint to be used for all requests in the batch. Currently
+              `/v1/chat/completions` and `/v1/embeddings` are supported.
+
+          input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your input file must be formatted as a
+              [JSONL file](https://platform.openai.com/docs/api-reference/batch/requestInput),
+              and must be uploaded with the purpose `batch`.
+
+          metadata: Optional custom metadata for the batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/batches",
+            body=maybe_transform(
+                {
+                    "completion_window": completion_window,
+                    "endpoint": endpoint,
+                    "input_file_id": input_file_id,
+                    "metadata": metadata,
+                },
+                batch_create_params.BatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Batch,
+        )
+
+    def retrieve(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Retrieves a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(
+                f"Expected a non-empty value for `batch_id` but received {batch_id!r}"
+            )
+        return self._get(
+            f"/batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Batch,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Batch]:
+        """List your organization's batches.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/batches",
+            page=SyncCursorPage[Batch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=Batch,
+        )
+
+    def cancel(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Cancels an in-progress batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(
+                f"Expected a non-empty value for `batch_id` but received {batch_id!r}"
+            )
+        return self._post(
+            f"/batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Batch,
+        )
+
+
+class AsyncBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+        return AsyncBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+        return AsyncBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        completion_window: Literal["24h"],
+        endpoint: Literal["/v1/chat/completions", "/v1/embeddings"],
+        input_file_id: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Creates and executes a batch from an uploaded file of requests
+
+        Args:
+          completion_window: The time frame within which the batch should be processed. Currently only `24h`
+              is supported.
+
+          endpoint: The endpoint to be used for all requests in the batch. Currently
+              `/v1/chat/completions` and `/v1/embeddings` are supported.
+
+          input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your input file must be formatted as a
+              [JSONL file](https://platform.openai.com/docs/api-reference/batch/requestInput),
+              and must be uploaded with the purpose `batch`.
+
+          metadata: Optional custom metadata for the batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/batches",
+            body=await async_maybe_transform(
+                {
+                    "completion_window": completion_window,
+                    "endpoint": endpoint,
+                    "input_file_id": input_file_id,
+                    "metadata": metadata,
+                },
+                batch_create_params.BatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Batch,
+        )
+
+    async def retrieve(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Retrieves a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(
+                f"Expected a non-empty value for `batch_id` but received {batch_id!r}"
+            )
+        return await self._get(
+            f"/batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Batch,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Batch, AsyncCursorPage[Batch]]:
+        """List your organization's batches.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/batches",
+            page=AsyncCursorPage[Batch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=Batch,
+        )
+
+    async def cancel(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Cancels an in-progress batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(
+                f"Expected a non-empty value for `batch_id` but received {batch_id!r}"
+            )
+        return await self._post(
+            f"/batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Batch,
+        )
+
+
+class BatchesWithRawResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithRawResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class BatchesWithStreamingResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithStreamingResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = async_to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            batches.cancel,
+        )
diff --git a/portkey_ai/_vendor/openai/resources/beta/__init__.py b/portkey_ai/_vendor/openai/resources/beta/__init__.py
new file mode 100644
index 00000000..01f53387
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/beta/__init__.py
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .beta import (
+    Beta,
+    AsyncBeta,
+    BetaWithRawResponse,
+    AsyncBetaWithRawResponse,
+    BetaWithStreamingResponse,
+    AsyncBetaWithStreamingResponse,
+)
+from .threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
+from .assistants import (
+    Assistants,
+    AsyncAssistants,
+    AssistantsWithRawResponse,
+    AsyncAssistantsWithRawResponse,
+    AssistantsWithStreamingResponse,
+    AsyncAssistantsWithStreamingResponse,
+)
+from .vector_stores import (
+    VectorStores,
+    AsyncVectorStores,
+    VectorStoresWithRawResponse,
+    AsyncVectorStoresWithRawResponse,
+    VectorStoresWithStreamingResponse,
+    AsyncVectorStoresWithStreamingResponse,
+)
+
+__all__ = [
+    "VectorStores",
+    "AsyncVectorStores",
+    "VectorStoresWithRawResponse",
+    "AsyncVectorStoresWithRawResponse",
+    "VectorStoresWithStreamingResponse",
+    "AsyncVectorStoresWithStreamingResponse",
+    "Assistants",
+    "AsyncAssistants",
+    "AssistantsWithRawResponse",
+    "AsyncAssistantsWithRawResponse",
+    "AssistantsWithStreamingResponse",
+    "AsyncAssistantsWithStreamingResponse",
+    "Threads",
+    "AsyncThreads",
+    "ThreadsWithRawResponse",
+    "AsyncThreadsWithRawResponse",
+    "ThreadsWithStreamingResponse",
+    "AsyncThreadsWithStreamingResponse",
+    "Beta",
+    "AsyncBeta",
+    "BetaWithRawResponse",
+    "AsyncBetaWithRawResponse",
+    "BetaWithStreamingResponse",
+    "AsyncBetaWithStreamingResponse",
+]
diff --git a/portkey_ai/_vendor/openai/resources/beta/assistants.py b/portkey_ai/_vendor/openai/resources/beta/assistants.py
new file mode 100644
index 00000000..89a407fe
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/beta/assistants.py
@@ -0,0 +1,937 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ...types.beta import (
+    assistant_list_params,
+    assistant_create_params,
+    assistant_update_params,
+)
+from ..._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ...types.beta.assistant import Assistant
+from ...types.beta.assistant_deleted import AssistantDeleted
+from ...types.beta.assistant_tool_param import AssistantToolParam
+from ...types.beta.assistant_response_format_option_param import (
+    AssistantResponseFormatOptionParam,
+)
+
+__all__ = ["Assistants", "AsyncAssistants"]
+
+
+class Assistants(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AssistantsWithRawResponse:
+        return AssistantsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AssistantsWithStreamingResponse:
+        return AssistantsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ],
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_create_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Create an assistant with a model and instructions.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models/overview) for
+              descriptions of them.
+
+          description: The description of the assistant. The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/assistants",
+            body=maybe_transform(
+                {
+                    "model": model,
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "name": name,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_create_params.AssistantCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Assistant,
+        )
+
+    def retrieve(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Retrieves an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(
+                f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Assistant,
+        )
+
+    def update(
+        self,
+        assistant_id: str,
+        *,
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_update_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """Modifies an assistant.
+
+        Args:
+          description: The description of the assistant.
+
+        The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models/overview) for
+              descriptions of them.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(
+                f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/assistants/{assistant_id}",
+            body=maybe_transform(
+                {
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "model": model,
+                    "name": name,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_update_params.AssistantUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Assistant,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Assistant]:
+        """Returns a list of assistants.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/assistants",
+            page=SyncCursorPage[Assistant],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    assistant_list_params.AssistantListParams,
+                ),
+            ),
+            model=Assistant,
+        )
+
+    def delete(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantDeleted:
+        """
+        Delete an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(
+                f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=AssistantDeleted,
+        )
+
+
+class AsyncAssistants(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
+        return AsyncAssistantsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
+        return AsyncAssistantsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ],
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_create_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Create an assistant with a model and instructions.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models/overview) for
+              descriptions of them.
+
+          description: The description of the assistant. The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/assistants",
+            body=await async_maybe_transform(
+                {
+                    "model": model,
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "name": name,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_create_params.AssistantCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Assistant,
+        )
+
+    async def retrieve(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Retrieves an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(
+                f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Assistant,
+        )
+
+    async def update(
+        self,
+        assistant_id: str,
+        *,
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_update_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """Modifies an assistant.
+
+        Args:
+          description: The description of the assistant.
+
+        The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models/overview) for
+              descriptions of them.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(
+                f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/assistants/{assistant_id}",
+            body=await async_maybe_transform(
+                {
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "model": model,
+                    "name": name,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_update_params.AssistantUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Assistant,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Assistant, AsyncCursorPage[Assistant]]:
+        """Returns a list of assistants.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/assistants",
+            page=AsyncCursorPage[Assistant],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    assistant_list_params.AssistantListParams,
+                ),
+            ),
+            model=Assistant,
+        )
+
+    async def delete(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantDeleted:
+        """
+        Delete an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(
+                f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=AssistantDeleted,
+        )
+
+
+class AssistantsWithRawResponse:
+    def __init__(self, assistants: Assistants) -> None:
+        self._assistants = assistants
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            assistants.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            assistants.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            assistants.delete,
+        )
+
+
+class AsyncAssistantsWithRawResponse:
+    def __init__(self, assistants: AsyncAssistants) -> None:
+        self._assistants = assistants
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            assistants.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            assistants.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            assistants.delete,
+        )
+
+
+class AssistantsWithStreamingResponse:
+    def __init__(self, assistants: Assistants) -> None:
+        self._assistants = assistants
+
+        self.create = to_streamed_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            assistants.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            assistants.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            assistants.delete,
+        )
+
+
+class AsyncAssistantsWithStreamingResponse:
+    def __init__(self, assistants: AsyncAssistants) -> None:
+        self._assistants = assistants
+
+        self.create = async_to_streamed_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            assistants.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            assistants.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            assistants.delete,
+        )
diff --git a/portkey_ai/_vendor/openai/resources/beta/beta.py b/portkey_ai/_vendor/openai/resources/beta/beta.py
new file mode 100644
index 00000000..0d980667
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/beta/beta.py
@@ -0,0 +1,146 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
+from ..._compat import cached_property
+from .assistants import (
+    Assistants,
+    AsyncAssistants,
+    AssistantsWithRawResponse,
+    AsyncAssistantsWithRawResponse,
+    AssistantsWithStreamingResponse,
+    AsyncAssistantsWithStreamingResponse,
+)
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .vector_stores import (
+    VectorStores,
+    AsyncVectorStores,
+    VectorStoresWithRawResponse,
+    AsyncVectorStoresWithRawResponse,
+    VectorStoresWithStreamingResponse,
+    AsyncVectorStoresWithStreamingResponse,
+)
+from .threads.threads import Threads, AsyncThreads
+from .vector_stores.vector_stores import VectorStores, AsyncVectorStores
+
+__all__ = ["Beta", "AsyncBeta"]
+
+
+class Beta(SyncAPIResource):
+    @cached_property
+    def vector_stores(self) -> VectorStores:
+        return VectorStores(self._client)
+
+    @cached_property
+    def assistants(self) -> Assistants:
+        return Assistants(self._client)
+
+    @cached_property
+    def threads(self) -> Threads:
+        return Threads(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> BetaWithRawResponse:
+        return BetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BetaWithStreamingResponse:
+        return BetaWithStreamingResponse(self)
+
+
+class AsyncBeta(AsyncAPIResource):
+    @cached_property
+    def vector_stores(self) -> AsyncVectorStores:
+        return AsyncVectorStores(self._client)
+
+    @cached_property
+    def assistants(self) -> AsyncAssistants:
+        return AsyncAssistants(self._client)
+
+    @cached_property
+    def threads(self) -> AsyncThreads:
+        return AsyncThreads(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncBetaWithRawResponse:
+        return AsyncBetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBetaWithStreamingResponse:
+        return AsyncBetaWithStreamingResponse(self)
+
+
+class BetaWithRawResponse:
+    def __init__(self, beta: Beta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def vector_stores(self) -> VectorStoresWithRawResponse:
+        return VectorStoresWithRawResponse(self._beta.vector_stores)
+
+    @cached_property
+    def assistants(self) -> AssistantsWithRawResponse:
+        return AssistantsWithRawResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> ThreadsWithRawResponse:
+        return ThreadsWithRawResponse(self._beta.threads)
+
+
+class AsyncBetaWithRawResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def vector_stores(self) -> AsyncVectorStoresWithRawResponse:
+        return AsyncVectorStoresWithRawResponse(self._beta.vector_stores)
+
+    @cached_property
+    def assistants(self) -> AsyncAssistantsWithRawResponse:
+        return AsyncAssistantsWithRawResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> AsyncThreadsWithRawResponse:
+        return AsyncThreadsWithRawResponse(self._beta.threads)
+
+
+class BetaWithStreamingResponse:
+    def __init__(self, beta: Beta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def vector_stores(self) -> VectorStoresWithStreamingResponse:
+        return VectorStoresWithStreamingResponse(self._beta.vector_stores)
+
+    @cached_property
+    def assistants(self) -> AssistantsWithStreamingResponse:
+        return AssistantsWithStreamingResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> ThreadsWithStreamingResponse:
+        return ThreadsWithStreamingResponse(self._beta.threads)
+
+
+class AsyncBetaWithStreamingResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def vector_stores(self) -> AsyncVectorStoresWithStreamingResponse:
+        return AsyncVectorStoresWithStreamingResponse(self._beta.vector_stores)
+
+    @cached_property
+    def assistants(self) -> AsyncAssistantsWithStreamingResponse:
+        return AsyncAssistantsWithStreamingResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> AsyncThreadsWithStreamingResponse:
+        return AsyncThreadsWithStreamingResponse(self._beta.threads)
diff --git a/portkey_ai/_vendor/openai/resources/beta/threads/__init__.py b/portkey_ai/_vendor/openai/resources/beta/threads/__init__.py
new file mode 100644
index 00000000..a66e445b
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/beta/threads/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+
+__all__ = [
+    "Runs",
+    "AsyncRuns",
+    "RunsWithRawResponse",
+    "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
+    "Messages",
+    "AsyncMessages",
+    "MessagesWithRawResponse",
+    "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
+    "Threads",
+    "AsyncThreads",
+    "ThreadsWithRawResponse",
+    "AsyncThreadsWithRawResponse",
+    "ThreadsWithStreamingResponse",
+    "AsyncThreadsWithStreamingResponse",
+]
diff --git a/portkey_ai/_vendor/openai/resources/beta/threads/messages.py b/portkey_ai/_vendor/openai/resources/beta/threads/messages.py
new file mode 100644
index 00000000..4669de7e
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/beta/threads/messages.py
@@ -0,0 +1,707 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.beta.threads import (
+    message_list_params,
+    message_create_params,
+    message_update_params,
+)
+from ....types.beta.threads.message import Message
+from ....types.beta.threads.message_deleted import MessageDeleted
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+class Messages(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self)
+
+    def create(
+        self,
+        thread_id: str,
+        *,
+        content: str,
+        role: Literal["user", "assistant"],
+        attachments: Optional[Iterable[message_create_params.Attachment]]
+        | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Create a message.
+
+        Args:
+          content: The content of the message.
+
+          role:
+              The role of the entity that is creating the message. Allowed values include:
+
+              - `user`: Indicates the message is sent by an actual user and should be used in
+                most cases to represent user-generated messages.
+              - `assistant`: Indicates the message is generated by the assistant. Use this
+                value to insert messages from the assistant into the conversation.
+
+          attachments: A list of files attached to the message, and the tools they should be added to.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/messages",
+            body=maybe_transform(
+                {
+                    "content": content,
+                    "role": role,
+                    "attachments": attachments,
+                    "metadata": metadata,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Message,
+        )
+
+    def retrieve(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Retrieve a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not message_id:
+            raise ValueError(
+                f"Expected a non-empty value for `message_id` but received {message_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Message,
+        )
+
+    def update(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Modifies a message.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not message_id:
+            raise ValueError(
+                f"Expected a non-empty value for `message_id` but received {message_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/messages/{message_id}",
+            body=maybe_transform(
+                {"metadata": metadata}, message_update_params.MessageUpdateParams
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Message,
+        )
+
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        run_id: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Message]:
+        """
+        Returns a list of messages for a given thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          run_id: Filter messages by the run ID that generated them.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/messages",
+            page=SyncCursorPage[Message],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                        "run_id": run_id,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=Message,
+        )
+
+    def delete(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageDeleted:
+        """
+        Deletes a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not message_id:
+            raise ValueError(
+                f"Expected a non-empty value for `message_id` but received {message_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=MessageDeleted,
+        )
+
+
+class AsyncMessages(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        content: str,
+        role: Literal["user", "assistant"],
+        attachments: Optional[Iterable[message_create_params.Attachment]]
+        | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Create a message.
+
+        Args:
+          content: The content of the message.
+
+          role:
+              The role of the entity that is creating the message. Allowed values include:
+
+              - `user`: Indicates the message is sent by an actual user and should be used in
+                most cases to represent user-generated messages.
+              - `assistant`: Indicates the message is generated by the assistant. Use this
+                value to insert messages from the assistant into the conversation.
+
+          attachments: A list of files attached to the message, and the tools they should be added to.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/messages",
+            body=await async_maybe_transform(
+                {
+                    "content": content,
+                    "role": role,
+                    "attachments": attachments,
+                    "metadata": metadata,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Message,
+        )
+
+    async def retrieve(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Retrieve a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not message_id:
+            raise ValueError(
+                f"Expected a non-empty value for `message_id` but received {message_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Message,
+        )
+
+    async def update(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Modifies a message.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not message_id:
+            raise ValueError(
+                f"Expected a non-empty value for `message_id` but received {message_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/messages/{message_id}",
+            body=await async_maybe_transform(
+                {"metadata": metadata}, message_update_params.MessageUpdateParams
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Message,
+        )
+
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        run_id: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Message, AsyncCursorPage[Message]]:
+        """
+        Returns a list of messages for a given thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          run_id: Filter messages by the run ID that generated them.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/messages",
+            page=AsyncCursorPage[Message],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                        "run_id": run_id,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=Message,
+        )
+
+    async def delete(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageDeleted:
+        """
+        Deletes a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not message_id:
+            raise ValueError(
+                f"Expected a non-empty value for `message_id` but received {message_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=MessageDeleted,
+        )
+
+
+class MessagesWithRawResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            messages.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            messages.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            messages.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            messages.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            messages.delete,
+        )
+
+
+class AsyncMessagesWithRawResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            messages.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            messages.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            messages.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            messages.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            messages.delete,
+        )
+
+
+class MessagesWithStreamingResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = to_streamed_response_wrapper(
+            messages.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            messages.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            messages.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            messages.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            messages.delete,
+        )
+
+
+class AsyncMessagesWithStreamingResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = async_to_streamed_response_wrapper(
+            messages.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            messages.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            messages.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            messages.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            messages.delete,
+        )
diff --git a/portkey_ai/_vendor/openai/resources/beta/threads/runs/__init__.py b/portkey_ai/_vendor/openai/resources/beta/threads/runs/__init__.py
new file mode 100644
index 00000000..50aa9fae
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/beta/threads/runs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .steps import (
+    Steps,
+    AsyncSteps,
+    StepsWithRawResponse,
+    AsyncStepsWithRawResponse,
+    StepsWithStreamingResponse,
+    AsyncStepsWithStreamingResponse,
+)
+
+__all__ = [
+    "Steps",
+    "AsyncSteps",
+    "StepsWithRawResponse",
+    "AsyncStepsWithRawResponse",
+    "StepsWithStreamingResponse",
+    "AsyncStepsWithStreamingResponse",
+    "Runs",
+    "AsyncRuns",
+    "RunsWithRawResponse",
+    "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
+]
diff --git a/portkey_ai/_vendor/openai/resources/beta/threads/runs/runs.py b/portkey_ai/_vendor/openai/resources/beta/threads/runs/runs.py
new file mode 100644
index 00000000..46324e07
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/beta/threads/runs/runs.py
@@ -0,0 +1,3458 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import time
+import typing_extensions
+from typing import Union, Iterable, Optional, overload
+from functools import partial
+from typing_extensions import Literal
+
+import httpx
+
+from ..... import _legacy_response
+from .steps import (
+    Steps,
+    AsyncSteps,
+    StepsWithRawResponse,
+    AsyncStepsWithRawResponse,
+    StepsWithStreamingResponse,
+    AsyncStepsWithStreamingResponse,
+)
+from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ....._utils import (
+    is_given,
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ....._compat import cached_property
+from ....._resource import SyncAPIResource, AsyncAPIResource
+from ....._response import (
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ....._streaming import Stream, AsyncStream
+from .....pagination import SyncCursorPage, AsyncCursorPage
+from ....._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from .....lib.streaming import (
+    AssistantEventHandler,
+    AssistantEventHandlerT,
+    AssistantStreamManager,
+    AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager,
+)
+from .....types.beta.threads import (
+    run_list_params,
+    run_create_params,
+    run_update_params,
+    run_submit_tool_outputs_params,
+)
+from .....types.beta.threads.run import Run
+from .....types.beta.assistant_tool_param import AssistantToolParam
+from .....types.beta.assistant_stream_event import AssistantStreamEvent
+from .....types.beta.assistant_tool_choice_option_param import (
+    AssistantToolChoiceOptionParam,
+)
+from .....types.beta.assistant_response_format_option_param import (
+    AssistantResponseFormatOptionParam,
+)
+
+__all__ = ["Runs", "AsyncRuns"]
+
+
+class Runs(SyncAPIResource):
+    @cached_property
+    def steps(self) -> Steps:
+        return Steps(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> RunsWithRawResponse:
+        return RunsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> RunsWithStreamingResponse:
+        return RunsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: bool,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+
+    def retrieve(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Retrieves a run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not run_id:
+            raise ValueError(
+                f"Expected a non-empty value for `run_id` but received {run_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/threads/{thread_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+        )
+
+    def update(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Modifies a run.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not run_id:
+            raise ValueError(
+                f"Expected a non-empty value for `run_id` but received {run_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs/{run_id}",
+            body=maybe_transform(
+                {"metadata": metadata}, run_update_params.RunUpdateParams
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+        )
+
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Run]:
+        """
+        Returns a list of runs belonging to a thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/runs",
+            page=SyncCursorPage[Run],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    run_list_params.RunListParams,
+                ),
+            ),
+            model=Run,
+        )
+
+    def cancel(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Cancels a run that is `in_progress`.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not run_id:
+            raise ValueError(
+                f"Expected a non-empty value for `run_id` but received {run_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs/{run_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+        )
+
+    def create_and_poll(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a run an poll for a terminal state. More information on Run
+        lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = self.create(
+            thread_id=thread_id,
+            assistant_id=assistant_id,
+            additional_instructions=additional_instructions,
+            additional_messages=additional_messages,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            response_format=response_format,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            # We assume we are not streaming when polling
+            stream=False,
+            tools=tools,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return self.poll(
+            run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            poll_interval_ms=poll_interval_ms,
+            timeout=timeout,
+        )
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AssistantStreamManager[AssistantEventHandler]
+        | AssistantStreamManager[AssistantEventHandlerT]
+    ):
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(
+            make_request, event_handler=event_handler or AssistantEventHandler()
+        )
+
+    def poll(
+        self,
+        run_id: str,
+        thread_id: str,
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to poll a run status until it reaches a terminal state. More
+        information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
+
+        if is_given(poll_interval_ms):
+            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        terminal_states = {
+            "requires_action",
+            "cancelled",
+            "completed",
+            "failed",
+            "expired",
+        }
+        while True:
+            response = self.with_raw_response.retrieve(
+                thread_id=thread_id,
+                run_id=run_id,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                extra_query=extra_query,
+                timeout=timeout,
+            )
+
+            run = response.parse()
+            # Return if we reached a terminal state
+            if run.status in terminal_states:
+                return run
+
+            if not is_given(poll_interval_ms):
+                from_header = response.headers.get("openai-poll-after-ms")
+                if from_header is not None:
+                    poll_interval_ms = int(from_header)
+                else:
+                    poll_interval_ms = 1000
+
+            time.sleep(poll_interval_ms / 1000)
+
+    @overload
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AssistantStreamManager[AssistantEventHandler]
+        | AssistantStreamManager[AssistantEventHandlerT]
+    ):
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(
+            make_request, event_handler=event_handler or AssistantEventHandler()
+        )
+
+    @overload
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: Literal[True],
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: bool,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(
+        ["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"]
+    )
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not run_id:
+            raise ValueError(
+                f"Expected a non-empty value for `run_id` but received {run_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": stream,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+
+    def submit_tool_outputs_and_poll(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to submit a tool output to a run and poll for a terminal run state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = self.submit_tool_outputs(
+            run_id=run_id,
+            thread_id=thread_id,
+            tool_outputs=tool_outputs,
+            stream=False,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return self.poll(
+            run_id=run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AssistantStreamManager[AssistantEventHandler]
+        | AssistantStreamManager[AssistantEventHandlerT]
+    ):
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        if not run_id:
+            raise ValueError(
+                f"Expected a non-empty value for `run_id` but received {run_id!r}"
+            )
+
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = partial(
+            self._post,
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": True,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(
+            request, event_handler=event_handler or AssistantEventHandler()
+        )
+
+
+class AsyncRuns(AsyncAPIResource):
+    @cached_property
+    def steps(self) -> AsyncSteps:
+        return AsyncSteps(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRunsWithRawResponse:
+        return AsyncRunsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
+        return AsyncRunsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: bool,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs",
+            body=await async_maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    async def retrieve(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Retrieves a run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not run_id:
+            raise ValueError(
+                f"Expected a non-empty value for `run_id` but received {run_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+        )
+
+    async def update(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Modifies a run.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not run_id:
+            raise ValueError(
+                f"Expected a non-empty value for `run_id` but received {run_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}",
+            body=await async_maybe_transform(
+                {"metadata": metadata}, run_update_params.RunUpdateParams
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+        )
+
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Run, AsyncCursorPage[Run]]:
+        """
+        Returns a list of runs belonging to a thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/runs",
+            page=AsyncCursorPage[Run],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    run_list_params.RunListParams,
+                ),
+            ),
+            model=Run,
+        )
+
+    async def cancel(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Cancels a run that is `in_progress`.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not run_id:
+            raise ValueError(
+                f"Expected a non-empty value for `run_id` but received {run_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+        )
+
+    async def create_and_poll(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a run an poll for a terminal state. More information on Run
+        lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = await self.create(
+            thread_id=thread_id,
+            assistant_id=assistant_id,
+            additional_instructions=additional_instructions,
+            additional_messages=additional_messages,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            response_format=response_format,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            # We assume we are not streaming when polling
+            stream=False,
+            tools=tools,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return await self.poll(
+            run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            poll_interval_ms=poll_interval_ms,
+            timeout=timeout,
+        )
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+        return AsyncAssistantStreamManager(
+            request, event_handler=event_handler or AsyncAssistantEventHandler()
+        )
+
+    async def poll(
+        self,
+        run_id: str,
+        thread_id: str,
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to poll a run status until it reaches a terminal state. More
+        information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
+
+        if is_given(poll_interval_ms):
+            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        terminal_states = {
+            "requires_action",
+            "cancelled",
+            "completed",
+            "failed",
+            "expired",
+        }
+        while True:
+            response = await self.with_raw_response.retrieve(
+                thread_id=thread_id,
+                run_id=run_id,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                extra_query=extra_query,
+                timeout=timeout,
+            )
+
+            run = response.parse()
+            # Return if we reached a terminal state
+            if run.status in terminal_states:
+                return run
+
+            if not is_given(poll_interval_ms):
+                from_header = response.headers.get("openai-poll-after-ms")
+                if from_header is not None:
+                    poll_interval_ms = int(from_header)
+                else:
+                    poll_interval_ms = 1000
+
+            time.sleep(poll_interval_ms / 1000)
+
+    @overload
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]]
+        | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+        return AsyncAssistantStreamManager(
+            request, event_handler=event_handler or AsyncAssistantEventHandler()
+        )
+
+    @overload
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: Literal[True],
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: bool,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(
+        ["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"]
+    )
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not run_id:
+            raise ValueError(
+                f"Expected a non-empty value for `run_id` but received {run_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=await async_maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": stream,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    async def submit_tool_outputs_and_poll(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to submit a tool output to a run and poll for a terminal run state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = await self.submit_tool_outputs(
+            run_id=run_id,
+            thread_id=thread_id,
+            tool_outputs=tool_outputs,
+            stream=False,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return await self.poll(
+            run_id=run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        if not run_id:
+            raise ValueError(
+                f"Expected a non-empty value for `run_id` but received {run_id!r}"
+            )
+
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": True,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+        return AsyncAssistantStreamManager(
+            request, event_handler=event_handler or AsyncAssistantEventHandler()
+        )
+
+
+class RunsWithRawResponse:
+    def __init__(self, runs: Runs) -> None:
+        self._runs = runs
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            runs.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            runs.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            runs.list,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            runs.cancel,
+        )
+        self.submit_tool_outputs = _legacy_response.to_raw_response_wrapper(
+            runs.submit_tool_outputs,
+        )
+
+    @cached_property
+    def steps(self) -> StepsWithRawResponse:
+        return StepsWithRawResponse(self._runs.steps)
+
+
+class AsyncRunsWithRawResponse:
+    def __init__(self, runs: AsyncRuns) -> None:
+        self._runs = runs
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            runs.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            runs.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            runs.list,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            runs.cancel,
+        )
+        self.submit_tool_outputs = _legacy_response.async_to_raw_response_wrapper(
+            runs.submit_tool_outputs,
+        )
+
+    @cached_property
+    def steps(self) -> AsyncStepsWithRawResponse:
+        return AsyncStepsWithRawResponse(self._runs.steps)
+
+
+class RunsWithStreamingResponse:
+    def __init__(self, runs: Runs) -> None:
+        self._runs = runs
+
+        self.create = to_streamed_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            runs.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            runs.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            runs.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            runs.cancel,
+        )
+        self.submit_tool_outputs = to_streamed_response_wrapper(
+            runs.submit_tool_outputs,
+        )
+
+    @cached_property
+    def steps(self) -> StepsWithStreamingResponse:
+        return StepsWithStreamingResponse(self._runs.steps)
+
+
+class AsyncRunsWithStreamingResponse:
+    def __init__(self, runs: AsyncRuns) -> None:
+        self._runs = runs
+
+        self.create = async_to_streamed_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            runs.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            runs.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            runs.list,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            runs.cancel,
+        )
+        self.submit_tool_outputs = async_to_streamed_response_wrapper(
+            runs.submit_tool_outputs,
+        )
+
+    @cached_property
+    def steps(self) -> AsyncStepsWithStreamingResponse:
+        return AsyncStepsWithStreamingResponse(self._runs.steps)
diff --git a/portkey_ai/_vendor/openai/resources/beta/threads/runs/steps.py b/portkey_ai/_vendor/openai/resources/beta/threads/runs/steps.py
new file mode 100644
index 00000000..1a1d530c
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/beta/threads/runs/steps.py
@@ -0,0 +1,340 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal
+
+import httpx
+
+from ..... import _legacy_response
+from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ....._utils import maybe_transform
+from ....._compat import cached_property
+from ....._resource import SyncAPIResource, AsyncAPIResource
+from ....._response import (
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .....pagination import SyncCursorPage, AsyncCursorPage
+from ....._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from .....types.beta.threads.runs import step_list_params
+from .....types.beta.threads.runs.run_step import RunStep
+
+__all__ = ["Steps", "AsyncSteps"]
+
+
+class Steps(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> StepsWithRawResponse:
+        return StepsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> StepsWithStreamingResponse:
+        return StepsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        step_id: str,
+        *,
+        thread_id: str,
+        run_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunStep:
+        """
+        Retrieves a run step.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not run_id:
+            raise ValueError(
+                f"Expected a non-empty value for `run_id` but received {run_id!r}"
+            )
+        if not step_id:
+            raise ValueError(
+                f"Expected a non-empty value for `step_id` but received {step_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=RunStep,
+        )
+
+    def list(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[RunStep]:
+        """
+        Returns a list of run steps belonging to a run.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not run_id:
+            raise ValueError(
+                f"Expected a non-empty value for `run_id` but received {run_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/runs/{run_id}/steps",
+            page=SyncCursorPage[RunStep],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    step_list_params.StepListParams,
+                ),
+            ),
+            model=RunStep,
+        )
+
+
+class AsyncSteps(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncStepsWithRawResponse:
+        return AsyncStepsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncStepsWithStreamingResponse:
+        return AsyncStepsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        step_id: str,
+        *,
+        thread_id: str,
+        run_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunStep:
+        """
+        Retrieves a run step.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not run_id:
+            raise ValueError(
+                f"Expected a non-empty value for `run_id` but received {run_id!r}"
+            )
+        if not step_id:
+            raise ValueError(
+                f"Expected a non-empty value for `step_id` but received {step_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=RunStep,
+        )
+
+    def list(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[RunStep, AsyncCursorPage[RunStep]]:
+        """
+        Returns a list of run steps belonging to a run.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        if not run_id:
+            raise ValueError(
+                f"Expected a non-empty value for `run_id` but received {run_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/runs/{run_id}/steps",
+            page=AsyncCursorPage[RunStep],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    step_list_params.StepListParams,
+                ),
+            ),
+            model=RunStep,
+        )
+
+
+class StepsWithRawResponse:
+    def __init__(self, steps: Steps) -> None:
+        self._steps = steps
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            steps.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            steps.list,
+        )
+
+
+class AsyncStepsWithRawResponse:
+    def __init__(self, steps: AsyncSteps) -> None:
+        self._steps = steps
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            steps.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            steps.list,
+        )
+
+
+class StepsWithStreamingResponse:
+    def __init__(self, steps: Steps) -> None:
+        self._steps = steps
+
+        self.retrieve = to_streamed_response_wrapper(
+            steps.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            steps.list,
+        )
+
+
+class AsyncStepsWithStreamingResponse:
+    def __init__(self, steps: AsyncSteps) -> None:
+        self._steps = steps
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            steps.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            steps.list,
+        )
diff --git a/portkey_ai/_vendor/openai/resources/beta/threads/threads.py b/portkey_ai/_vendor/openai/resources/beta/threads/threads.py
new file mode 100644
index 00000000..7516ec38
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/beta/threads/threads.py
@@ -0,0 +1,2274 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional, overload
+from functools import partial
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from .runs.runs import Runs, AsyncRuns
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...._streaming import Stream, AsyncStream
+from ....types.beta import (
+    thread_create_params,
+    thread_update_params,
+    thread_create_and_run_params,
+)
+from ...._base_client import (
+    make_request_options,
+)
+from ....lib.streaming import (
+    AssistantEventHandler,
+    AssistantEventHandlerT,
+    AssistantStreamManager,
+    AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager,
+)
+from ....types.beta.thread import Thread
+from ....types.beta.threads.run import Run
+from ....types.beta.thread_deleted import ThreadDeleted
+from ....types.beta.assistant_stream_event import AssistantStreamEvent
+from ....types.beta.assistant_tool_choice_option_param import (
+    AssistantToolChoiceOptionParam,
+)
+from ....types.beta.assistant_response_format_option_param import (
+    AssistantResponseFormatOptionParam,
+)
+
+__all__ = ["Threads", "AsyncThreads"]
+
+
+class Threads(SyncAPIResource):
+    @cached_property
+    def runs(self) -> Runs:
+        return Runs(self._client)
+
+    @cached_property
+    def messages(self) -> Messages:
+        return Messages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ThreadsWithRawResponse:
+        return ThreadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ThreadsWithStreamingResponse:
+        return ThreadsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Create a thread.
+
+        Args:
+          messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
+              start the thread with.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/threads",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_create_params.ThreadCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Thread,
+        )
+
+    def retrieve(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Retrieves a thread.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Thread,
+        )
+
+    def update(
+        self,
+        thread_id: str,
+        *,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_update_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Modifies a thread.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}",
+            body=maybe_transform(
+                {
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_update_params.ThreadUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Thread,
+        )
+
+    def delete(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ThreadDeleted:
+        """
+        Delete a thread.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=ThreadDeleted,
+        )
+
+    @overload
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: If no thread is provided, an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: If no thread is provided, an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: bool,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: If no thread is provided, an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/threads/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "thread": thread,
+                    "tool_choice": tool_choice,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+
+    def create_and_run_poll(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a thread, start a run and then poll for a terminal state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = self.create_and_run(
+            assistant_id=assistant_id,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            response_format=response_format,
+            temperature=temperature,
+            stream=False,
+            thread=thread,
+            tool_resources=tool_resources,
+            tool_choice=tool_choice,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            tools=tools,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return self.runs.poll(
+            run.id,
+            run.thread_id,
+            extra_headers,
+            extra_query,
+            extra_body,
+            timeout,
+            poll_interval_ms,
+        )
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a thread and stream the run back"""
+        ...
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        ...
+
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AssistantStreamManager[AssistantEventHandler]
+        | AssistantStreamManager[AssistantEventHandlerT]
+    ):
+        """Create a thread and stream the run back"""
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            "/threads/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "thread": thread,
+                    "tools": tools,
+                    "tool": tool_resources,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(
+            make_request, event_handler=event_handler or AssistantEventHandler()
+        )
+
+
+class AsyncThreads(AsyncAPIResource):
+    @cached_property
+    def runs(self) -> AsyncRuns:
+        return AsyncRuns(self._client)
+
+    @cached_property
+    def messages(self) -> AsyncMessages:
+        return AsyncMessages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncThreadsWithRawResponse:
+        return AsyncThreadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
+        return AsyncThreadsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Create a thread.
+
+        Args:
+          messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
+              start the thread with.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/threads",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_create_params.ThreadCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Thread,
+        )
+
+    async def retrieve(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Retrieves a thread.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Thread,
+        )
+
+    async def update(
+        self,
+        thread_id: str,
+        *,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_update_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Thread:
+        """
+        Modifies a thread.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}",
+            body=await async_maybe_transform(
+                {
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_update_params.ThreadUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Thread,
+        )
+
+    async def delete(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ThreadDeleted:
+        """
+        Delete a thread.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(
+                f"Expected a non-empty value for `thread_id` but received {thread_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=ThreadDeleted,
+        )
+
+    @overload
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: If no thread is provided, an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: If no thread is provided, an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: bool,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: If no thread is provided, an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/threads/runs",
+            body=await async_maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "thread": thread,
+                    "tool_choice": tool_choice,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    async def create_and_run_poll(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a thread, start a run and then poll for a terminal state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = await self.create_and_run(
+            assistant_id=assistant_id,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            response_format=response_format,
+            temperature=temperature,
+            stream=False,
+            thread=thread,
+            tool_resources=tool_resources,
+            tool_choice=tool_choice,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            tools=tools,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return await self.runs.poll(
+            run.id,
+            run.thread_id,
+            extra_headers,
+            extra_query,
+            extra_body,
+            timeout,
+            poll_interval_ms,
+        )
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a thread and stream the run back"""
+        ...
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        ...
+
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources]
+        | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]]
+        | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy]
+        | NotGiven = NOT_GIVEN,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a thread and stream the run back"""
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            "/threads/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "thread": thread,
+                    "tools": tools,
+                    "tool": tool_resources,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+        return AsyncAssistantStreamManager(
+            request, event_handler=event_handler or AsyncAssistantEventHandler()
+        )
+
+
+class ThreadsWithRawResponse:
+    def __init__(self, threads: Threads) -> None:
+        self._threads = threads
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            threads.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            threads.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            threads.update,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            threads.delete,
+        )
+        self.create_and_run = _legacy_response.to_raw_response_wrapper(
+            threads.create_and_run,
+        )
+
+    @cached_property
+    def runs(self) -> RunsWithRawResponse:
+        return RunsWithRawResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self._threads.messages)
+
+
+class AsyncThreadsWithRawResponse:
+    def __init__(self, threads: AsyncThreads) -> None:
+        self._threads = threads
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            threads.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            threads.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            threads.update,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            threads.delete,
+        )
+        self.create_and_run = _legacy_response.async_to_raw_response_wrapper(
+            threads.create_and_run,
+        )
+
+    @cached_property
+    def runs(self) -> AsyncRunsWithRawResponse:
+        return AsyncRunsWithRawResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self._threads.messages)
+
+
+class ThreadsWithStreamingResponse:
+    def __init__(self, threads: Threads) -> None:
+        self._threads = threads
+
+        self.create = to_streamed_response_wrapper(
+            threads.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            threads.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            threads.update,
+        )
+        self.delete = to_streamed_response_wrapper(
+            threads.delete,
+        )
+        self.create_and_run = to_streamed_response_wrapper(
+            threads.create_and_run,
+        )
+
+    @cached_property
+    def runs(self) -> RunsWithStreamingResponse:
+        return RunsWithStreamingResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self._threads.messages)
+
+
+class AsyncThreadsWithStreamingResponse:
+    def __init__(self, threads: AsyncThreads) -> None:
+        self._threads = threads
+
+        self.create = async_to_streamed_response_wrapper(
+            threads.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            threads.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            threads.update,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            threads.delete,
+        )
+        self.create_and_run = async_to_streamed_response_wrapper(
+            threads.create_and_run,
+        )
+
+    @cached_property
+    def runs(self) -> AsyncRunsWithStreamingResponse:
+        return AsyncRunsWithStreamingResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self._threads.messages)
diff --git a/portkey_ai/_vendor/openai/resources/beta/vector_stores/__init__.py b/portkey_ai/_vendor/openai/resources/beta/vector_stores/__init__.py
new file mode 100644
index 00000000..96ae16c3
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/beta/vector_stores/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from .file_batches import (
+    FileBatches,
+    AsyncFileBatches,
+    FileBatchesWithRawResponse,
+    AsyncFileBatchesWithRawResponse,
+    FileBatchesWithStreamingResponse,
+    AsyncFileBatchesWithStreamingResponse,
+)
+from .vector_stores import (
+    VectorStores,
+    AsyncVectorStores,
+    VectorStoresWithRawResponse,
+    AsyncVectorStoresWithRawResponse,
+    VectorStoresWithStreamingResponse,
+    AsyncVectorStoresWithStreamingResponse,
+)
+
+__all__ = [
+    "Files",
+    "AsyncFiles",
+    "FilesWithRawResponse",
+    "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
+    "FileBatches",
+    "AsyncFileBatches",
+    "FileBatchesWithRawResponse",
+    "AsyncFileBatchesWithRawResponse",
+    "FileBatchesWithStreamingResponse",
+    "AsyncFileBatchesWithStreamingResponse",
+    "VectorStores",
+    "AsyncVectorStores",
+    "VectorStoresWithRawResponse",
+    "AsyncVectorStoresWithRawResponse",
+    "VectorStoresWithStreamingResponse",
+    "AsyncVectorStoresWithStreamingResponse",
+]
diff --git a/portkey_ai/_vendor/openai/resources/beta/vector_stores/file_batches.py b/portkey_ai/_vendor/openai/resources/beta/vector_stores/file_batches.py
new file mode 100644
index 00000000..57992579
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/beta/vector_stores/file_batches.py
@@ -0,0 +1,800 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import asyncio
+from typing import List, Iterable
+from typing_extensions import Literal
+from concurrent.futures import Future, ThreadPoolExecutor, as_completed
+
+import httpx
+import sniffio
+
+from .... import _legacy_response
+from ....types import FileObject
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ...._utils import (
+    is_given,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.beta.vector_stores import (
+    file_batch_create_params,
+    file_batch_list_files_params,
+)
+from ....types.beta.vector_stores.vector_store_file import VectorStoreFile
+from ....types.beta.vector_stores.vector_store_file_batch import VectorStoreFileBatch
+
+__all__ = ["FileBatches", "AsyncFileBatches"]
+
+
+class FileBatches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FileBatchesWithRawResponse:
+        return FileBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FileBatchesWithStreamingResponse:
+        return FileBatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Create a vector store file batch.
+
+        Args:
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/file_batches",
+            body=maybe_transform(
+                {"file_ids": file_ids}, file_batch_create_params.FileBatchCreateParams
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    def retrieve(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Retrieves a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        if not batch_id:
+            raise ValueError(
+                f"Expected a non-empty value for `batch_id` but received {batch_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    def cancel(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Cancel a vector store file batch.
+
+        This attempts to cancel the processing of
+        files in this batch as soon as possible.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        if not batch_id:
+            raise ValueError(
+                f"Expected a non-empty value for `batch_id` but received {batch_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    def create_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Create a vector store batch and poll until all files have been processed."""
+        batch = self.create(
+            vector_store_id=vector_store_id,
+            file_ids=file_ids,
+        )
+        # TODO: don't poll unless necessary??
+        return self.poll(
+            batch.id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def list_files(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"]
+        | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[VectorStoreFile]:
+        """
+        Returns a list of vector store files in a batch.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        if not batch_id:
+            raise ValueError(
+                f"Expected a non-empty value for `batch_id` but received {batch_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+            page=SyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_batch_list_files_params.FileBatchListFilesParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    def poll(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Wait for the given file batch to be processed.
+
+        Note: this will return even if one of the files failed to process, you need to
+        check batch.file_counts.failed_count to handle this case.
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = self.with_raw_response.retrieve(
+                batch_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            batch = response.parse()
+            if batch.file_counts.in_progress > 0:
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                self._sleep(poll_interval_ms / 1000)
+                continue
+
+            return batch
+
+    def upload_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        files: Iterable[FileTypes],
+        max_concurrency: int = 5,
+        file_ids: List[str] = [],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Uploads the given files concurrently and then creates a vector store file batch.
+
+        If you've already uploaded certain files that you want to include in this batch
+        then you can pass their IDs through the `file_ids` argument.
+
+        By default, if any file upload fails then an exception will be eagerly raised.
+
+        The number of concurrency uploads is configurable using the `max_concurrency`
+        parameter.
+
+        Note: this method only supports `asyncio` or `trio` as the backing async
+        runtime.
+        """
+        results: list[FileObject] = []
+
+        with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
+            futures: list[Future[FileObject]] = [
+                executor.submit(
+                    self._client.files.create,
+                    file=file,
+                    purpose="assistants",
+                )
+                for file in files
+            ]
+
+        for future in as_completed(futures):
+            exc = future.exception()
+            if exc:
+                raise exc
+
+            results.append(future.result())
+
+        batch = self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_ids=[*file_ids, *(f.id for f in results)],
+            poll_interval_ms=poll_interval_ms,
+        )
+        return batch
+
+
+class AsyncFileBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFileBatchesWithRawResponse:
+        return AsyncFileBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFileBatchesWithStreamingResponse:
+        return AsyncFileBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Create a vector store file batch.
+
+        Args:
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/file_batches",
+            body=await async_maybe_transform(
+                {"file_ids": file_ids}, file_batch_create_params.FileBatchCreateParams
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    async def retrieve(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Retrieves a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        if not batch_id:
+            raise ValueError(
+                f"Expected a non-empty value for `batch_id` but received {batch_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    async def cancel(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Cancel a vector store file batch.
+
+        This attempts to cancel the processing of
+        files in this batch as soon as possible.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        if not batch_id:
+            raise ValueError(
+                f"Expected a non-empty value for `batch_id` but received {batch_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    async def create_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Create a vector store batch and poll until all files have been processed."""
+        batch = await self.create(
+            vector_store_id=vector_store_id,
+            file_ids=file_ids,
+        )
+        # TODO: don't poll unless necessary??
+        return await self.poll(
+            batch.id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def list_files(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"]
+        | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]:
+        """
+        Returns a list of vector store files in a batch.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        if not batch_id:
+            raise ValueError(
+                f"Expected a non-empty value for `batch_id` but received {batch_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+            page=AsyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_batch_list_files_params.FileBatchListFilesParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    async def poll(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Wait for the given file batch to be processed.
+
+        Note: this will return even if one of the files failed to process, you need to
+        check batch.file_counts.failed_count to handle this case.
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = await self.with_raw_response.retrieve(
+                batch_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            batch = response.parse()
+            if batch.file_counts.in_progress > 0:
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                await self._sleep(poll_interval_ms / 1000)
+                continue
+
+            return batch
+
+    async def upload_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        files: Iterable[FileTypes],
+        max_concurrency: int = 5,
+        file_ids: List[str] = [],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Uploads the given files concurrently and then creates a vector store file batch.
+
+        If you've already uploaded certain files that you want to include in this batch
+        then you can pass their IDs through the `file_ids` argument.
+
+        By default, if any file upload fails then an exception will be eagerly raised.
+
+        The number of concurrency uploads is configurable using the `max_concurrency`
+        parameter.
+
+        Note: this method only supports `asyncio` or `trio` as the backing async
+        runtime.
+        """
+        uploaded_files: list[FileObject] = []
+
+        async_library = sniffio.current_async_library()
+
+        if async_library == "asyncio":
+
+            async def asyncio_upload_file(
+                semaphore: asyncio.Semaphore, file: FileTypes
+            ) -> None:
+                async with semaphore:
+                    file_obj = await self._client.files.create(
+                        file=file,
+                        purpose="assistants",
+                    )
+                    uploaded_files.append(file_obj)
+
+            semaphore = asyncio.Semaphore(max_concurrency)
+
+            tasks = [asyncio_upload_file(semaphore, file) for file in files]
+
+            await asyncio.gather(*tasks)
+        elif async_library == "trio":
+            # We only import if the library is being used.
+            # We support Python 3.7 so are using an older version of trio that does not have type information
+            import trio  # type: ignore # pyright: ignore[reportMissingTypeStubs]
+
+            async def trio_upload_file(
+                limiter: trio.CapacityLimiter, file: FileTypes
+            ) -> None:
+                async with limiter:
+                    file_obj = await self._client.files.create(
+                        file=file,
+                        purpose="assistants",
+                    )
+                    uploaded_files.append(file_obj)
+
+            limiter = trio.CapacityLimiter(max_concurrency)
+
+            async with trio.open_nursery() as nursery:
+                for file in files:
+                    nursery.start_soon(
+                        trio_upload_file, limiter, file
+                    )  # pyright: ignore [reportUnknownMemberType]
+        else:
+            raise RuntimeError(
+                f"Async runtime {async_library} is not supported yet. Only asyncio or trio is supported",
+            )
+
+        batch = await self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_ids=[*file_ids, *(f.id for f in uploaded_files)],
+            poll_interval_ms=poll_interval_ms,
+        )
+        return batch
+
+
+class FileBatchesWithRawResponse:
+    def __init__(self, file_batches: FileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = _legacy_response.to_raw_response_wrapper(
+            file_batches.list_files,
+        )
+
+
+class AsyncFileBatchesWithRawResponse:
+    def __init__(self, file_batches: AsyncFileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.list_files,
+        )
+
+
+class FileBatchesWithStreamingResponse:
+    def __init__(self, file_batches: FileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = to_streamed_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = to_streamed_response_wrapper(
+            file_batches.list_files,
+        )
+
+
+class AsyncFileBatchesWithStreamingResponse:
+    def __init__(self, file_batches: AsyncFileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = async_to_streamed_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = async_to_streamed_response_wrapper(
+            file_batches.list_files,
+        )
diff --git a/portkey_ai/_vendor/openai/resources/beta/vector_stores/files.py b/portkey_ai/_vendor/openai/resources/beta/vector_stores/files.py
new file mode 100644
index 00000000..dbdfc67b
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/beta/vector_stores/files.py
@@ -0,0 +1,736 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from typing_extensions import Literal, assert_never
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ...._utils import (
+    is_given,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.beta.vector_stores import file_list_params, file_create_params
+from ....types.beta.vector_stores.vector_store_file import VectorStoreFile
+from ....types.beta.vector_stores.vector_store_file_deleted import (
+    VectorStoreFileDeleted,
+)
+
+__all__ = ["Files", "AsyncFiles"]
+
+
+class Files(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FilesWithRawResponse:
+        return FilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FilesWithStreamingResponse:
+        return FilesWithStreamingResponse(self)
+
+    def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Create a vector store file by attaching a
+        [File](https://platform.openai.com/docs/api-reference/files) to a
+        [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object).
+
+        Args:
+          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+              vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/files",
+            body=maybe_transform(
+                {"file_id": file_id}, file_create_params.FileCreateParams
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    def retrieve(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Retrieves a vector store file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        if not file_id:
+            raise ValueError(
+                f"Expected a non-empty value for `file_id` but received {file_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    def list(
+        self,
+        vector_store_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"]
+        | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[VectorStoreFile]:
+        """
+        Returns a list of vector store files.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files",
+            page=SyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_list_params.FileListParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    def delete(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileDeleted:
+        """Delete a vector store file.
+
+        This will remove the file from the vector store but
+        the file itself will not be deleted. To delete the file, use the
+        [delete file](https://platform.openai.com/docs/api-reference/files/delete)
+        endpoint.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        if not file_id:
+            raise ValueError(
+                f"Expected a non-empty value for `file_id` but received {file_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStoreFileDeleted,
+        )
+
+    def create_and_poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Attach a file to the given vector store and wait for it to be processed."""
+        self.create(vector_store_id=vector_store_id, file_id=file_id)
+
+        return self.poll(
+            file_id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Wait for the vector store file to finish processing.
+
+        Note: this will return even if the file failed to process, you need to check
+        file.last_error and file.status to handle these cases
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = self.with_raw_response.retrieve(
+                file_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            file = response.parse()
+            if file.status == "in_progress":
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                self._sleep(poll_interval_ms / 1000)
+            elif (
+                file.status == "cancelled"
+                or file.status == "completed"
+                or file.status == "failed"
+            ):
+                return file
+            else:
+                if TYPE_CHECKING:  # type: ignore[unreachable]
+                    assert_never(file.status)
+                else:
+                    return file
+
+    def upload(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+    ) -> VectorStoreFile:
+        """Upload a file to the `files` API and then attach it to the given vector store.
+
+        Note the file will be asynchronously processed (you can use the alternative
+        polling helper method to wait for processing to complete).
+        """
+        file_obj = self._client.files.create(file=file, purpose="assistants")
+        return self.create(vector_store_id=vector_store_id, file_id=file_obj.id)
+
+    def upload_and_poll(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Add a file to a vector store and poll until processing is complete."""
+        file_obj = self._client.files.create(file=file, purpose="assistants")
+        return self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_id=file_obj.id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+
+class AsyncFiles(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        return AsyncFilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        return AsyncFilesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Create a vector store file by attaching a
+        [File](https://platform.openai.com/docs/api-reference/files) to a
+        [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object).
+
+        Args:
+          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+              vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/files",
+            body=await async_maybe_transform(
+                {"file_id": file_id}, file_create_params.FileCreateParams
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    async def retrieve(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Retrieves a vector store file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        if not file_id:
+            raise ValueError(
+                f"Expected a non-empty value for `file_id` but received {file_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    def list(
+        self,
+        vector_store_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"]
+        | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]:
+        """
+        Returns a list of vector store files.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files",
+            page=AsyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_list_params.FileListParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    async def delete(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileDeleted:
+        """Delete a vector store file.
+
+        This will remove the file from the vector store but
+        the file itself will not be deleted. To delete the file, use the
+        [delete file](https://platform.openai.com/docs/api-reference/files/delete)
+        endpoint.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        if not file_id:
+            raise ValueError(
+                f"Expected a non-empty value for `file_id` but received {file_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStoreFileDeleted,
+        )
+
+    async def create_and_poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Attach a file to the given vector store and wait for it to be processed."""
+        await self.create(vector_store_id=vector_store_id, file_id=file_id)
+
+        return await self.poll(
+            file_id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    async def poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Wait for the vector store file to finish processing.
+
+        Note: this will return even if the file failed to process, you need to check
+        file.last_error and file.status to handle these cases
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = await self.with_raw_response.retrieve(
+                file_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            file = response.parse()
+            if file.status == "in_progress":
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                await self._sleep(poll_interval_ms / 1000)
+            elif (
+                file.status == "cancelled"
+                or file.status == "completed"
+                or file.status == "failed"
+            ):
+                return file
+            else:
+                if TYPE_CHECKING:  # type: ignore[unreachable]
+                    assert_never(file.status)
+                else:
+                    return file
+
+    async def upload(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+    ) -> VectorStoreFile:
+        """Upload a file to the `files` API and then attach it to the given vector store.
+
+        Note the file will be asynchronously processed (you can use the alternative
+        polling helper method to wait for processing to complete).
+        """
+        file_obj = await self._client.files.create(file=file, purpose="assistants")
+        return await self.create(vector_store_id=vector_store_id, file_id=file_obj.id)
+
+    async def upload_and_poll(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Add a file to a vector store and poll until processing is complete."""
+        file_obj = await self._client.files.create(file=file, purpose="assistants")
+        return await self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_id=file_obj.id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+
+class FilesWithRawResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            files.delete,
+        )
+
+
+class AsyncFilesWithRawResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            files.delete,
+        )
+
+
+class FilesWithStreamingResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            files.delete,
+        )
+
+
+class AsyncFilesWithStreamingResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = async_to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            files.delete,
+        )
diff --git a/portkey_ai/_vendor/openai/resources/beta/vector_stores/vector_stores.py b/portkey_ai/_vendor/openai/resources/beta/vector_stores/vector_stores.py
new file mode 100644
index 00000000..5efa8d6f
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/beta/vector_stores/vector_stores.py
@@ -0,0 +1,729 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .file_batches import (
+    FileBatches,
+    AsyncFileBatches,
+    FileBatchesWithRawResponse,
+    AsyncFileBatchesWithRawResponse,
+    FileBatchesWithStreamingResponse,
+    AsyncFileBatchesWithStreamingResponse,
+)
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ....types.beta import (
+    vector_store_list_params,
+    vector_store_create_params,
+    vector_store_update_params,
+)
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.beta.vector_store import VectorStore
+from ....types.beta.vector_store_deleted import VectorStoreDeleted
+
+__all__ = ["VectorStores", "AsyncVectorStores"]
+
+
+class VectorStores(SyncAPIResource):
+    @cached_property
+    def files(self) -> Files:
+        return Files(self._client)
+
+    @cached_property
+    def file_batches(self) -> FileBatches:
+        return FileBatches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> VectorStoresWithRawResponse:
+        return VectorStoresWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> VectorStoresWithStreamingResponse:
+        return VectorStoresWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
+        file_ids: List[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Create a vector store.
+
+        Args:
+          expires_after: The expiration policy for a vector store.
+
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/vector_stores",
+            body=maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "file_ids": file_ids,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_create_params.VectorStoreCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStore,
+        )
+
+    def retrieve(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Retrieves a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStore,
+        )
+
+    def update(
+        self,
+        vector_store_id: str,
+        *,
+        expires_after: Optional[vector_store_update_params.ExpiresAfter]
+        | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Modifies a vector store.
+
+        Args:
+          expires_after: The expiration policy for a vector store.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}",
+            body=maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_update_params.VectorStoreUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStore,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[VectorStore]:
+        """Returns a list of vector stores.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/vector_stores",
+            page=SyncCursorPage[VectorStore],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    vector_store_list_params.VectorStoreListParams,
+                ),
+            ),
+            model=VectorStore,
+        )
+
+    def delete(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreDeleted:
+        """
+        Delete a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStoreDeleted,
+        )
+
+
+class AsyncVectorStores(AsyncAPIResource):
+    @cached_property
+    def files(self) -> AsyncFiles:
+        return AsyncFiles(self._client)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatches:
+        return AsyncFileBatches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncVectorStoresWithRawResponse:
+        return AsyncVectorStoresWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncVectorStoresWithStreamingResponse:
+        return AsyncVectorStoresWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
+        file_ids: List[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Create a vector store.
+
+        Args:
+          expires_after: The expiration policy for a vector store.
+
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/vector_stores",
+            body=await async_maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "file_ids": file_ids,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_create_params.VectorStoreCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStore,
+        )
+
+    async def retrieve(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Retrieves a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStore,
+        )
+
+    async def update(
+        self,
+        vector_store_id: str,
+        *,
+        expires_after: Optional[vector_store_update_params.ExpiresAfter]
+        | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Modifies a vector store.
+
+        Args:
+          expires_after: The expiration policy for a vector store.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}",
+            body=await async_maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_update_params.VectorStoreUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStore,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStore, AsyncCursorPage[VectorStore]]:
+        """Returns a list of vector stores.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/vector_stores",
+            page=AsyncCursorPage[VectorStore],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    vector_store_list_params.VectorStoreListParams,
+                ),
+            ),
+            model=VectorStore,
+        )
+
+    async def delete(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreDeleted:
+        """
+        Delete a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(
+                f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}"
+            )
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=VectorStoreDeleted,
+        )
+
+
+class VectorStoresWithRawResponse:
+    def __init__(self, vector_stores: VectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            vector_stores.delete,
+        )
+
+    @cached_property
+    def files(self) -> FilesWithRawResponse:
+        return FilesWithRawResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> FileBatchesWithRawResponse:
+        return FileBatchesWithRawResponse(self._vector_stores.file_batches)
+
+
+class AsyncVectorStoresWithRawResponse:
+    def __init__(self, vector_stores: AsyncVectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.delete,
+        )
+
+    @cached_property
+    def files(self) -> AsyncFilesWithRawResponse:
+        return AsyncFilesWithRawResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesWithRawResponse:
+        return AsyncFileBatchesWithRawResponse(self._vector_stores.file_batches)
+
+
+class VectorStoresWithStreamingResponse:
+    def __init__(self, vector_stores: VectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = to_streamed_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            vector_stores.delete,
+        )
+
+    @cached_property
+    def files(self) -> FilesWithStreamingResponse:
+        return FilesWithStreamingResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> FileBatchesWithStreamingResponse:
+        return FileBatchesWithStreamingResponse(self._vector_stores.file_batches)
+
+
+class AsyncVectorStoresWithStreamingResponse:
+    def __init__(self, vector_stores: AsyncVectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = async_to_streamed_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            vector_stores.delete,
+        )
+
+    @cached_property
+    def files(self) -> AsyncFilesWithStreamingResponse:
+        return AsyncFilesWithStreamingResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesWithStreamingResponse:
+        return AsyncFileBatchesWithStreamingResponse(self._vector_stores.file_batches)
diff --git a/portkey_ai/_vendor/openai/resources/chat/__init__.py b/portkey_ai/_vendor/openai/resources/chat/__init__.py
new file mode 100644
index 00000000..52dfdcea
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/chat/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .chat import (
+    Chat,
+    AsyncChat,
+    ChatWithRawResponse,
+    AsyncChatWithRawResponse,
+    ChatWithStreamingResponse,
+    AsyncChatWithStreamingResponse,
+)
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Completions",
+    "AsyncCompletions",
+    "CompletionsWithRawResponse",
+    "AsyncCompletionsWithRawResponse",
+    "CompletionsWithStreamingResponse",
+    "AsyncCompletionsWithStreamingResponse",
+    "Chat",
+    "AsyncChat",
+    "ChatWithRawResponse",
+    "AsyncChatWithRawResponse",
+    "ChatWithStreamingResponse",
+    "AsyncChatWithStreamingResponse",
+]
diff --git a/portkey_ai/_vendor/openai/resources/chat/chat.py b/portkey_ai/_vendor/openai/resources/chat/chat.py
new file mode 100644
index 00000000..d14d0555
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/chat/chat.py
@@ -0,0 +1,80 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
+
+__all__ = ["Chat", "AsyncChat"]
+
+
+class Chat(SyncAPIResource):
+    @cached_property
+    def completions(self) -> Completions:
+        return Completions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ChatWithRawResponse:
+        return ChatWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ChatWithStreamingResponse:
+        return ChatWithStreamingResponse(self)
+
+
+class AsyncChat(AsyncAPIResource):
+    @cached_property
+    def completions(self) -> AsyncCompletions:
+        return AsyncCompletions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncChatWithRawResponse:
+        return AsyncChatWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncChatWithStreamingResponse:
+        return AsyncChatWithStreamingResponse(self)
+
+
+class ChatWithRawResponse:
+    def __init__(self, chat: Chat) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> CompletionsWithRawResponse:
+        return CompletionsWithRawResponse(self._chat.completions)
+
+
+class AsyncChatWithRawResponse:
+    def __init__(self, chat: AsyncChat) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> AsyncCompletionsWithRawResponse:
+        return AsyncCompletionsWithRawResponse(self._chat.completions)
+
+
+class ChatWithStreamingResponse:
+    def __init__(self, chat: Chat) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> CompletionsWithStreamingResponse:
+        return CompletionsWithStreamingResponse(self._chat.completions)
+
+
+class AsyncChatWithStreamingResponse:
+    def __init__(self, chat: AsyncChat) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> AsyncCompletionsWithStreamingResponse:
+        return AsyncCompletionsWithStreamingResponse(self._chat.completions)
diff --git a/portkey_ai/_vendor/openai/resources/chat/completions.py b/portkey_ai/_vendor/openai/resources/chat/completions.py
new file mode 100644
index 00000000..eb710d7b
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/chat/completions.py
@@ -0,0 +1,1272 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional, overload
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ..._streaming import Stream, AsyncStream
+from ...types.chat import completion_create_params
+from ..._base_client import (
+    make_request_options,
+)
+from ...types.chat_model import ChatModel
+from ...types.chat.chat_completion import ChatCompletion
+from ...types.chat.chat_completion_chunk import ChatCompletionChunk
+from ...types.chat.chat_completion_tool_param import ChatCompletionToolParam
+from ...types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ...types.chat.chat_completion_stream_options_param import (
+    ChatCompletionStreamOptionsParam,
+)
+from ...types.chat.chat_completion_tool_choice_option_param import (
+    ChatCompletionToolChoiceOptionParam,
+)
+
+__all__ = ["Completions", "AsyncCompletions"]
+
+
+class Completions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> CompletionsWithRawResponse:
+        return CompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        return CompletionsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """
+        Creates a model response for the given chat conversation.
+
+        Args:
+          messages: A list of messages comprising the conversation so far.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
+
+          model: ID of the model to use. See the
+              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
+              table for details on which models work with the Chat API.
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model. `none` means the model
+              will not call a function and instead generates a message. `auto` means the model
+              can pick between generating a message or calling a function. Specifying a
+              particular function via `{"name": "my_function"}` forces the model to call that
+              function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion.
+
+              The total length of input tokens and generated tokens is limited by the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          response_format: An object specifying the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens.
+
+          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
+              sent as data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: Literal[True],
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[ChatCompletionChunk]:
+        """
+        Creates a model response for the given chat conversation.
+
+        Args:
+          messages: A list of messages comprising the conversation so far.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
+
+          model: ID of the model to use. See the
+              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
+              table for details on which models work with the Chat API.
+
+          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
+              sent as data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model. `none` means the model
+              will not call a function and instead generates a message. `auto` means the model
+              can pick between generating a message or calling a function. Specifying a
+              particular function via `{"name": "my_function"}` forces the model to call that
+              function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion.
+
+              The total length of input tokens and generated tokens is limited by the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          response_format: An object specifying the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: bool,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
+        """
+        Creates a model response for the given chat conversation.
+
+        Args:
+          messages: A list of messages comprising the conversation so far.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
+
+          model: ID of the model to use. See the
+              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
+              table for details on which models work with the Chat API.
+
+          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
+              sent as data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model. `none` means the model
+              will not call a function and instead generates a message. `auto` means the model
+              can pick between generating a message or calling a function. Specifying a
+              particular function via `{"name": "my_function"}` forces the model to call that
+              function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion.
+
+              The total length of input tokens and generated tokens is limited by the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          response_format: An object specifying the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["messages", "model"], ["messages", "model", "stream"])
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
+        return self._post(
+            "/chat/completions",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_tokens": max_tokens,
+                    "n": n,
+                    "presence_penalty": presence_penalty,
+                    "response_format": response_format,
+                    "seed": seed,
+                    "stop": stop,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=Stream[ChatCompletionChunk],
+        )
+
+
+class AsyncCompletions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        return AsyncCompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        return AsyncCompletionsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """
+        Creates a model response for the given chat conversation.
+
+        Args:
+          messages: A list of messages comprising the conversation so far.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
+
+          model: ID of the model to use. See the
+              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
+              table for details on which models work with the Chat API.
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model. `none` means the model
+              will not call a function and instead generates a message. `auto` means the model
+              can pick between generating a message or calling a function. Specifying a
+              particular function via `{"name": "my_function"}` forces the model to call that
+              function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion.
+
+              The total length of input tokens and generated tokens is limited by the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          response_format: An object specifying the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens.
+
+          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
+              sent as data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: Literal[True],
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ChatCompletionChunk]:
+        """
+        Creates a model response for the given chat conversation.
+
+        Args:
+          messages: A list of messages comprising the conversation so far.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
+
+          model: ID of the model to use. See the
+              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
+              table for details on which models work with the Chat API.
+
+          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
+              sent as data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model. `none` means the model
+              will not call a function and instead generates a message. `auto` means the model
+              can pick between generating a message or calling a function. Specifying a
+              particular function via `{"name": "my_function"}` forces the model to call that
+              function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion.
+
+              The total length of input tokens and generated tokens is limited by the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          response_format: An object specifying the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: bool,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
+        """
+        Creates a model response for the given chat conversation.
+
+        Args:
+          messages: A list of messages comprising the conversation so far.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
+
+          model: ID of the model to use. See the
+              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
+              table for details on which models work with the Chat API.
+
+          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
+              sent as data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model. `none` means the model
+              will not call a function and instead generates a message. `auto` means the model
+              can pick between generating a message or calling a function. Specifying a
+              particular function via `{"name": "my_function"}` forces the model to call that
+              function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion.
+
+              The total length of input tokens and generated tokens is limited by the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          response_format: An object specifying the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["messages", "model"], ["messages", "model", "stream"])
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
+        return await self._post(
+            "/chat/completions",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_tokens": max_tokens,
+                    "n": n,
+                    "presence_penalty": presence_penalty,
+                    "response_format": response_format,
+                    "seed": seed,
+                    "stop": stop,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=AsyncStream[ChatCompletionChunk],
+        )
+
+
+class CompletionsWithRawResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsWithRawResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class CompletionsWithStreamingResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = to_streamed_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsWithStreamingResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = async_to_streamed_response_wrapper(
+            completions.create,
+        )
diff --git a/portkey_ai/_vendor/openai/resources/completions.py b/portkey_ai/_vendor/openai/resources/completions.py
new file mode 100644
index 00000000..9d610254
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/completions.py
@@ -0,0 +1,1158 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional, overload
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import completion_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .._streaming import Stream, AsyncStream
+from .._base_client import (
+    make_request_options,
+)
+from ..types.completion import Completion
+from ..types.chat.chat_completion_stream_options_param import (
+    ChatCompletionStreamOptionsParam,
+)
+
+__all__ = ["Completions", "AsyncCompletions"]
+
+
+class Completions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> CompletionsWithRawResponse:
+        return CompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        return CompletionsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        model: Union[
+            str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]
+        ],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models/overview) for
+              descriptions of them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        model: Union[
+            str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]
+        ],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        stream: Literal[True],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[Completion]:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models/overview) for
+              descriptions of them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        model: Union[
+            str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]
+        ],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        stream: bool,
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | Stream[Completion]:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models/overview) for
+              descriptions of them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["model", "prompt"], ["model", "prompt", "stream"])
+    def create(
+        self,
+        *,
+        model: Union[
+            str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]
+        ],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | Stream[Completion]:
+        return self._post(
+            "/completions",
+            body=maybe_transform(
+                {
+                    "model": model,
+                    "prompt": prompt,
+                    "best_of": best_of,
+                    "echo": echo,
+                    "frequency_penalty": frequency_penalty,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_tokens": max_tokens,
+                    "n": n,
+                    "presence_penalty": presence_penalty,
+                    "seed": seed,
+                    "stop": stop,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "suffix": suffix,
+                    "temperature": temperature,
+                    "top_p": top_p,
+                    "user": user,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Completion,
+            stream=stream or False,
+            stream_cls=Stream[Completion],
+        )
+
+
+class AsyncCompletions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        return AsyncCompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        return AsyncCompletionsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        model: Union[
+            str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]
+        ],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models/overview) for
+              descriptions of them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        model: Union[
+            str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]
+        ],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        stream: Literal[True],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[Completion]:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models/overview) for
+              descriptions of them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        model: Union[
+            str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]
+        ],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        stream: bool,
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | AsyncStream[Completion]:
+        """
+        Creates a completion for the provided prompt and parameters.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models/overview) for
+              descriptions of them.
+
+          prompt: The prompt(s) to generate completions for, encoded as a string, array of
+              strings, array of tokens, or array of token arrays.
+
+              Note that <|endoftext|> is the document separator that the model sees during
+              training, so if a prompt is not specified the model will generate as if from the
+              beginning of a new document.
+
+          stream: Whether to stream back partial progress. If set, tokens will be sent as
+              data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
+              the highest log probability per token). Results cannot be streamed.
+
+              When used with `n`, `best_of` controls the number of candidate completions and
+              `n` specifies how many to return – `best_of` must be greater than `n`.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          echo: Echo back the prompt in addition to the completion
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+              tokenizer) to an associated bias value from -100 to 100. You can use this
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
+
+              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+              from being generated.
+
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+              The maximum value for `logprobs` is 5.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
+
+              The token count of your prompt plus `max_tokens` cannot exceed the model's
+              context length.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          n: How many completions to generate for each prompt.
+
+              **Note:** Because this parameter generates many completions, it can quickly
+              consume your token quota. Use carefully and ensure that you have reasonable
+              settings for `max_tokens` and `stop`.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result.
+
+              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+              response parameter to monitor changes in the backend.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          suffix: The suffix that comes after a completion of inserted text.
+
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+              We generally recommend altering this or `top_p` but not both.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["model", "prompt"], ["model", "prompt", "stream"])
+    async def create(
+        self,
+        *,
+        model: Union[
+            str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]
+        ],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | NotGiven = NOT_GIVEN,
+        echo: Optional[bool] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam]
+        | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Completion | AsyncStream[Completion]:
+        return await self._post(
+            "/completions",
+            body=await async_maybe_transform(
+                {
+                    "model": model,
+                    "prompt": prompt,
+                    "best_of": best_of,
+                    "echo": echo,
+                    "frequency_penalty": frequency_penalty,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_tokens": max_tokens,
+                    "n": n,
+                    "presence_penalty": presence_penalty,
+                    "seed": seed,
+                    "stop": stop,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "suffix": suffix,
+                    "temperature": temperature,
+                    "top_p": top_p,
+                    "user": user,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Completion,
+            stream=stream or False,
+            stream_cls=AsyncStream[Completion],
+        )
+
+
+class CompletionsWithRawResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsWithRawResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class CompletionsWithStreamingResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = to_streamed_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsWithStreamingResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = async_to_streamed_response_wrapper(
+            completions.create,
+        )
diff --git a/portkey_ai/_vendor/openai/resources/embeddings.py b/portkey_ai/_vendor/openai/resources/embeddings.py
new file mode 100644
index 00000000..6131c798
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/embeddings.py
@@ -0,0 +1,276 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import base64
+from typing import List, Union, Iterable, cast
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import embedding_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import is_given, maybe_transform
+from .._compat import cached_property
+from .._extras import numpy as np, has_numpy
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .._base_client import (
+    make_request_options,
+)
+from ..types.create_embedding_response import CreateEmbeddingResponse
+
+__all__ = ["Embeddings", "AsyncEmbeddings"]
+
+
+class Embeddings(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> EmbeddingsWithRawResponse:
+        return EmbeddingsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> EmbeddingsWithStreamingResponse:
+        return EmbeddingsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
+        model: Union[
+            str,
+            Literal[
+                "text-embedding-ada-002",
+                "text-embedding-3-small",
+                "text-embedding-3-large",
+            ],
+        ],
+        dimensions: int | NotGiven = NOT_GIVEN,
+        encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> CreateEmbeddingResponse:
+        """
+        Creates an embedding vector representing the input text.
+
+        Args:
+          input: Input text to embed, encoded as a string or array of tokens. To embed multiple
+              inputs in a single request, pass an array of strings or array of token arrays.
+              The input must not exceed the max input tokens for the model (8192 tokens for
+              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
+              dimensions or less.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models/overview) for
+              descriptions of them.
+
+          dimensions: The number of dimensions the resulting output embeddings should have. Only
+              supported in `text-embedding-3` and later models.
+
+          encoding_format: The format to return the embeddings in. Can be either `float` or
+              [`base64`](https://pypi.org/project/pybase64/).
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        params = {
+            "input": input,
+            "model": model,
+            "user": user,
+            "dimensions": dimensions,
+            "encoding_format": encoding_format,
+        }
+        if not is_given(encoding_format) and has_numpy():
+            params["encoding_format"] = "base64"
+
+        def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
+            if is_given(encoding_format):
+                # don't modify the response object if a user explicitly asked for a format
+                return obj
+
+            for embedding in obj.data:
+                data = cast(object, embedding.embedding)
+                if not isinstance(data, str):
+                    # numpy is not installed / base64 optimisation isn't enabled for this model yet
+                    continue
+
+                embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
+                    base64.b64decode(data), dtype="float32"
+                ).tolist()
+
+            return obj
+
+        return self._post(
+            "/embeddings",
+            body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            cast_to=CreateEmbeddingResponse,
+        )
+
+
+class AsyncEmbeddings(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncEmbeddingsWithRawResponse:
+        return AsyncEmbeddingsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncEmbeddingsWithStreamingResponse:
+        return AsyncEmbeddingsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
+        model: Union[
+            str,
+            Literal[
+                "text-embedding-ada-002",
+                "text-embedding-3-small",
+                "text-embedding-3-large",
+            ],
+        ],
+        dimensions: int | NotGiven = NOT_GIVEN,
+        encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> CreateEmbeddingResponse:
+        """
+        Creates an embedding vector representing the input text.
+
+        Args:
+          input: Input text to embed, encoded as a string or array of tokens. To embed multiple
+              inputs in a single request, pass an array of strings or array of token arrays.
+              The input must not exceed the max input tokens for the model (8192 tokens for
+              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
+              dimensions or less.
+              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+              for counting tokens.
+
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models/overview) for
+              descriptions of them.
+
+          dimensions: The number of dimensions the resulting output embeddings should have. Only
+              supported in `text-embedding-3` and later models.
+
+          encoding_format: The format to return the embeddings in. Can be either `float` or
+              [`base64`](https://pypi.org/project/pybase64/).
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        params = {
+            "input": input,
+            "model": model,
+            "user": user,
+            "dimensions": dimensions,
+            "encoding_format": encoding_format,
+        }
+        if not is_given(encoding_format) and has_numpy():
+            params["encoding_format"] = "base64"
+
+        def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
+            if is_given(encoding_format):
+                # don't modify the response object if a user explicitly asked for a format
+                return obj
+
+            for embedding in obj.data:
+                data = cast(object, embedding.embedding)
+                if not isinstance(data, str):
+                    # numpy is not installed / base64 optimisation isn't enabled for this model yet
+                    continue
+
+                embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
+                    base64.b64decode(data), dtype="float32"
+                ).tolist()
+
+            return obj
+
+        return await self._post(
+            "/embeddings",
+            body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            cast_to=CreateEmbeddingResponse,
+        )
+
+
+class EmbeddingsWithRawResponse:
+    def __init__(self, embeddings: Embeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            embeddings.create,
+        )
+
+
+class AsyncEmbeddingsWithRawResponse:
+    def __init__(self, embeddings: AsyncEmbeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            embeddings.create,
+        )
+
+
+class EmbeddingsWithStreamingResponse:
+    def __init__(self, embeddings: Embeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = to_streamed_response_wrapper(
+            embeddings.create,
+        )
+
+
+class AsyncEmbeddingsWithStreamingResponse:
+    def __init__(self, embeddings: AsyncEmbeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = async_to_streamed_response_wrapper(
+            embeddings.create,
+        )
diff --git a/portkey_ai/_vendor/openai/resources/files.py b/portkey_ai/_vendor/openai/resources/files.py
new file mode 100644
index 00000000..bda2ff0a
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/files.py
@@ -0,0 +1,747 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import time
+import typing_extensions
+from typing import Mapping, cast
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import file_list_params, file_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from .._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
+from ..pagination import SyncPage, AsyncPage
+from .._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ..types.file_object import FileObject
+from ..types.file_deleted import FileDeleted
+
+__all__ = ["Files", "AsyncFiles"]
+
+
+class Files(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FilesWithRawResponse:
+        return FilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FilesWithStreamingResponse:
+        return FilesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        purpose: Literal["fine-tune", "assistants"],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileObject:
+        """Upload a file that can be used across various endpoints.
+
+        The size of all the
+        files uploaded by one organization can be up to 100 GB.
+
+        The size of individual files can be a maximum of 512 MB or 2 million tokens for
+        Assistants. See the
+        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) to
+        learn more about the types of files supported. The Fine-tuning API only supports
+        `.jsonl` files.
+
+        Please [contact us](https://help.openai.com/) if you need to increase these
+        storage limits.
+
+        Args:
+          file: The File object (not file name) to be uploaded.
+
+          purpose: The intended purpose of the uploaded file.
+
+              Use "fine-tune" for
+              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning) and
+              "assistants" for
+              [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
+              [Messages](https://platform.openai.com/docs/api-reference/messages). This allows
+              us to validate the format of the uploaded file is correct for fine-tuning.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "purpose": purpose,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        if files:
+            # It should be noted that the actual Content-Type header that will be
+            # sent to the server will contain a `boundary` parameter, e.g.
+            # multipart/form-data; boundary=---abc--
+            extra_headers = {
+                "Content-Type": "multipart/form-data",
+                **(extra_headers or {}),
+            }
+        return self._post(
+            "/files",
+            body=maybe_transform(body, file_create_params.FileCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=FileObject,
+        )
+
+    def retrieve(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileObject:
+        """
+        Returns information about a specific file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(
+                f"Expected a non-empty value for `file_id` but received {file_id!r}"
+            )
+        return self._get(
+            f"/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=FileObject,
+        )
+
+    def list(
+        self,
+        *,
+        purpose: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[FileObject]:
+        """
+        Returns a list of files that belong to the user's organization.
+
+        Args:
+          purpose: Only return files with the given purpose.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/files",
+            page=SyncPage[FileObject],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {"purpose": purpose}, file_list_params.FileListParams
+                ),
+            ),
+            model=FileObject,
+        )
+
+    def delete(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileDeleted:
+        """
+        Delete a file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(
+                f"Expected a non-empty value for `file_id` but received {file_id!r}"
+            )
+        return self._delete(
+            f"/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=FileDeleted,
+        )
+
+    def content(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(
+                f"Expected a non-empty value for `file_id` but received {file_id!r}"
+            )
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return self._get(
+            f"/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+    @typing_extensions.deprecated("The `.content()` method should be used instead")
+    def retrieve_content(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(
+                f"Expected a non-empty value for `file_id` but received {file_id!r}"
+            )
+        return self._get(
+            f"/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=str,
+        )
+
+    def wait_for_processing(
+        self,
+        id: str,
+        *,
+        poll_interval: float = 5.0,
+        max_wait_seconds: float = 30 * 60,
+    ) -> FileObject:
+        """Waits for the given file to be processed, default timeout is 30 mins."""
+        TERMINAL_STATES = {"processed", "error", "deleted"}
+
+        start = time.time()
+        file = self.retrieve(id)
+        while file.status not in TERMINAL_STATES:
+            self._sleep(poll_interval)
+
+            file = self.retrieve(id)
+            if time.time() - start > max_wait_seconds:
+                raise RuntimeError(
+                    f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds."
+                )
+
+        return file
+
+
+class AsyncFiles(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        return AsyncFilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        return AsyncFilesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        purpose: Literal["fine-tune", "assistants"],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileObject:
+        """Upload a file that can be used across various endpoints.
+
+        The size of all the
+        files uploaded by one organization can be up to 100 GB.
+
+        The size of individual files can be a maximum of 512 MB or 2 million tokens for
+        Assistants. See the
+        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) to
+        learn more about the types of files supported. The Fine-tuning API only supports
+        `.jsonl` files.
+
+        Please [contact us](https://help.openai.com/) if you need to increase these
+        storage limits.
+
+        Args:
+          file: The File object (not file name) to be uploaded.
+
+          purpose: The intended purpose of the uploaded file.
+
+              Use "fine-tune" for
+              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning) and
+              "assistants" for
+              [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
+              [Messages](https://platform.openai.com/docs/api-reference/messages). This allows
+              us to validate the format of the uploaded file is correct for fine-tuning.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "purpose": purpose,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        if files:
+            # It should be noted that the actual Content-Type header that will be
+            # sent to the server will contain a `boundary` parameter, e.g.
+            # multipart/form-data; boundary=---abc--
+            extra_headers = {
+                "Content-Type": "multipart/form-data",
+                **(extra_headers or {}),
+            }
+        return await self._post(
+            "/files",
+            body=await async_maybe_transform(body, file_create_params.FileCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=FileObject,
+        )
+
+    async def retrieve(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileObject:
+        """
+        Returns information about a specific file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(
+                f"Expected a non-empty value for `file_id` but received {file_id!r}"
+            )
+        return await self._get(
+            f"/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=FileObject,
+        )
+
+    def list(
+        self,
+        *,
+        purpose: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[FileObject, AsyncPage[FileObject]]:
+        """
+        Returns a list of files that belong to the user's organization.
+
+        Args:
+          purpose: Only return files with the given purpose.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/files",
+            page=AsyncPage[FileObject],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {"purpose": purpose}, file_list_params.FileListParams
+                ),
+            ),
+            model=FileObject,
+        )
+
+    async def delete(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileDeleted:
+        """
+        Delete a file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(
+                f"Expected a non-empty value for `file_id` but received {file_id!r}"
+            )
+        return await self._delete(
+            f"/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=FileDeleted,
+        )
+
+    async def content(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(
+                f"Expected a non-empty value for `file_id` but received {file_id!r}"
+            )
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return await self._get(
+            f"/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+    @typing_extensions.deprecated("The `.content()` method should be used instead")
+    async def retrieve_content(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(
+                f"Expected a non-empty value for `file_id` but received {file_id!r}"
+            )
+        return await self._get(
+            f"/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=str,
+        )
+
+    async def wait_for_processing(
+        self,
+        id: str,
+        *,
+        poll_interval: float = 5.0,
+        max_wait_seconds: float = 30 * 60,
+    ) -> FileObject:
+        """Waits for the given file to be processed, default timeout is 30 mins."""
+        TERMINAL_STATES = {"processed", "error", "deleted"}
+
+        start = time.time()
+        file = await self.retrieve(id)
+        while file.status not in TERMINAL_STATES:
+            await self._sleep(poll_interval)
+
+            file = await self.retrieve(id)
+            if time.time() - start > max_wait_seconds:
+                raise RuntimeError(
+                    f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds."
+                )
+
+        return file
+
+
+class FilesWithRawResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            files.delete,
+        )
+        self.content = _legacy_response.to_raw_response_wrapper(
+            files.content,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class AsyncFilesWithRawResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            files.delete,
+        )
+        self.content = _legacy_response.async_to_raw_response_wrapper(
+            files.content,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class FilesWithStreamingResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            files.delete,
+        )
+        self.content = to_custom_streamed_response_wrapper(
+            files.content,
+            StreamedBinaryAPIResponse,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class AsyncFilesWithStreamingResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = async_to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            files.delete,
+        )
+        self.content = async_to_custom_streamed_response_wrapper(
+            files.content,
+            AsyncStreamedBinaryAPIResponse,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
diff --git a/portkey_ai/_vendor/openai/resources/fine_tuning/__init__.py b/portkey_ai/_vendor/openai/resources/fine_tuning/__init__.py
new file mode 100644
index 00000000..7765231f
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/fine_tuning/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .jobs import (
+    Jobs,
+    AsyncJobs,
+    JobsWithRawResponse,
+    AsyncJobsWithRawResponse,
+    JobsWithStreamingResponse,
+    AsyncJobsWithStreamingResponse,
+)
+from .fine_tuning import (
+    FineTuning,
+    AsyncFineTuning,
+    FineTuningWithRawResponse,
+    AsyncFineTuningWithRawResponse,
+    FineTuningWithStreamingResponse,
+    AsyncFineTuningWithStreamingResponse,
+)
+
+__all__ = [
+    "Jobs",
+    "AsyncJobs",
+    "JobsWithRawResponse",
+    "AsyncJobsWithRawResponse",
+    "JobsWithStreamingResponse",
+    "AsyncJobsWithStreamingResponse",
+    "FineTuning",
+    "AsyncFineTuning",
+    "FineTuningWithRawResponse",
+    "AsyncFineTuningWithRawResponse",
+    "FineTuningWithStreamingResponse",
+    "AsyncFineTuningWithStreamingResponse",
+]
diff --git a/portkey_ai/_vendor/openai/resources/fine_tuning/fine_tuning.py b/portkey_ai/_vendor/openai/resources/fine_tuning/fine_tuning.py
new file mode 100644
index 00000000..0404fed6
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/fine_tuning/fine_tuning.py
@@ -0,0 +1,81 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .jobs import (
+    Jobs,
+    AsyncJobs,
+    JobsWithRawResponse,
+    AsyncJobsWithRawResponse,
+    JobsWithStreamingResponse,
+    AsyncJobsWithStreamingResponse,
+)
+from ..._compat import cached_property
+from .jobs.jobs import Jobs, AsyncJobs
+from ..._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["FineTuning", "AsyncFineTuning"]
+
+
+class FineTuning(SyncAPIResource):
+    @cached_property
+    def jobs(self) -> Jobs:
+        return Jobs(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> FineTuningWithRawResponse:
+        return FineTuningWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FineTuningWithStreamingResponse:
+        return FineTuningWithStreamingResponse(self)
+
+
+class AsyncFineTuning(AsyncAPIResource):
+    @cached_property
+    def jobs(self) -> AsyncJobs:
+        return AsyncJobs(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncFineTuningWithRawResponse:
+        return AsyncFineTuningWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFineTuningWithStreamingResponse:
+        return AsyncFineTuningWithStreamingResponse(self)
+
+
+class FineTuningWithRawResponse:
+    def __init__(self, fine_tuning: FineTuning) -> None:
+        self._fine_tuning = fine_tuning
+
+    @cached_property
+    def jobs(self) -> JobsWithRawResponse:
+        return JobsWithRawResponse(self._fine_tuning.jobs)
+
+
+class AsyncFineTuningWithRawResponse:
+    def __init__(self, fine_tuning: AsyncFineTuning) -> None:
+        self._fine_tuning = fine_tuning
+
+    @cached_property
+    def jobs(self) -> AsyncJobsWithRawResponse:
+        return AsyncJobsWithRawResponse(self._fine_tuning.jobs)
+
+
+class FineTuningWithStreamingResponse:
+    def __init__(self, fine_tuning: FineTuning) -> None:
+        self._fine_tuning = fine_tuning
+
+    @cached_property
+    def jobs(self) -> JobsWithStreamingResponse:
+        return JobsWithStreamingResponse(self._fine_tuning.jobs)
+
+
+class AsyncFineTuningWithStreamingResponse:
+    def __init__(self, fine_tuning: AsyncFineTuning) -> None:
+        self._fine_tuning = fine_tuning
+
+    @cached_property
+    def jobs(self) -> AsyncJobsWithStreamingResponse:
+        return AsyncJobsWithStreamingResponse(self._fine_tuning.jobs)
diff --git a/portkey_ai/_vendor/openai/resources/fine_tuning/jobs/__init__.py b/portkey_ai/_vendor/openai/resources/fine_tuning/jobs/__init__.py
new file mode 100644
index 00000000..94cd1fb7
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/fine_tuning/jobs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .jobs import (
+    Jobs,
+    AsyncJobs,
+    JobsWithRawResponse,
+    AsyncJobsWithRawResponse,
+    JobsWithStreamingResponse,
+    AsyncJobsWithStreamingResponse,
+)
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
+
+__all__ = [
+    "Checkpoints",
+    "AsyncCheckpoints",
+    "CheckpointsWithRawResponse",
+    "AsyncCheckpointsWithRawResponse",
+    "CheckpointsWithStreamingResponse",
+    "AsyncCheckpointsWithStreamingResponse",
+    "Jobs",
+    "AsyncJobs",
+    "JobsWithRawResponse",
+    "AsyncJobsWithRawResponse",
+    "JobsWithStreamingResponse",
+    "AsyncJobsWithStreamingResponse",
+]
diff --git a/portkey_ai/_vendor/openai/resources/fine_tuning/jobs/checkpoints.py b/portkey_ai/_vendor/openai/resources/fine_tuning/jobs/checkpoints.py
new file mode 100644
index 00000000..3d8519d0
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/fine_tuning/jobs/checkpoints.py
@@ -0,0 +1,188 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.fine_tuning.jobs import checkpoint_list_params
+from ....types.fine_tuning.jobs.fine_tuning_job_checkpoint import (
+    FineTuningJobCheckpoint,
+)
+
+__all__ = ["Checkpoints", "AsyncCheckpoints"]
+
+
+class Checkpoints(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> CheckpointsWithRawResponse:
+        return CheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CheckpointsWithStreamingResponse:
+        return CheckpointsWithStreamingResponse(self)
+
+    def list(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[FineTuningJobCheckpoint]:
+        """
+        List checkpoints for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last checkpoint ID from the previous pagination request.
+
+          limit: Number of checkpoints to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}"
+            )
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+            page=SyncCursorPage[FineTuningJobCheckpoint],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    checkpoint_list_params.CheckpointListParams,
+                ),
+            ),
+            model=FineTuningJobCheckpoint,
+        )
+
+
+class AsyncCheckpoints(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncCheckpointsWithRawResponse:
+        return AsyncCheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse:
+        return AsyncCheckpointsWithStreamingResponse(self)
+
+    def list(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[
+        FineTuningJobCheckpoint, AsyncCursorPage[FineTuningJobCheckpoint]
+    ]:
+        """
+        List checkpoints for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last checkpoint ID from the previous pagination request.
+
+          limit: Number of checkpoints to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}"
+            )
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+            page=AsyncCursorPage[FineTuningJobCheckpoint],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    checkpoint_list_params.CheckpointListParams,
+                ),
+            ),
+            model=FineTuningJobCheckpoint,
+        )
+
+
+class CheckpointsWithRawResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = _legacy_response.to_raw_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class AsyncCheckpointsWithRawResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class CheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = to_streamed_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class AsyncCheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = async_to_streamed_response_wrapper(
+            checkpoints.list,
+        )
diff --git a/portkey_ai/_vendor/openai/resources/fine_tuning/jobs/jobs.py b/portkey_ai/_vendor/openai/resources/fine_tuning/jobs/jobs.py
new file mode 100644
index 00000000..56897077
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/fine_tuning/jobs/jobs.py
@@ -0,0 +1,725 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.fine_tuning import (
+    job_list_params,
+    job_create_params,
+    job_list_events_params,
+)
+from ....types.fine_tuning.fine_tuning_job import FineTuningJob
+from ....types.fine_tuning.fine_tuning_job_event import FineTuningJobEvent
+
+__all__ = ["Jobs", "AsyncJobs"]
+
+
+class Jobs(SyncAPIResource):
+    @cached_property
+    def checkpoints(self) -> Checkpoints:
+        return Checkpoints(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> JobsWithRawResponse:
+        return JobsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> JobsWithStreamingResponse:
+        return JobsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
+        training_file: str,
+        hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+        integrations: Optional[Iterable[job_create_params.Integration]]
+        | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        validation_file: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Creates a fine-tuning job which begins the process of creating a new model from
+        a given dataset.
+
+        Response includes details of the enqueued job including job status and the name
+        of the fine-tuned models once complete.
+
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+
+        Args:
+          model: The name of the model to fine-tune. You can select one of the
+              [supported models](https://platform.openai.com/docs/guides/fine-tuning/what-models-can-be-fine-tuned).
+
+          training_file: The ID of an uploaded file that contains training data.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your dataset must be formatted as a JSONL file. Additionally, you must upload
+              your file with the purpose `fine-tune`.
+
+              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              for more details.
+
+          hyperparameters: The hyperparameters used for the fine-tuning job.
+
+          integrations: A list of integrations to enable for your fine-tuning job.
+
+          seed: The seed controls the reproducibility of the job. Passing in the same seed and
+              job parameters should produce the same results, but may differ in rare cases. If
+              a seed is not specified, one will be generated for you.
+
+          suffix: A string of up to 18 characters that will be added to your fine-tuned model
+              name.
+
+              For example, a `suffix` of "custom-model-name" would produce a model name like
+              `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
+
+          validation_file: The ID of an uploaded file that contains validation data.
+
+              If you provide this file, the data is used to generate validation metrics
+              periodically during fine-tuning. These metrics can be viewed in the fine-tuning
+              results file. The same data should not be present in both train and validation
+              files.
+
+              Your dataset must be formatted as a JSONL file. You must upload your file with
+              the purpose `fine-tune`.
+
+              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/fine_tuning/jobs",
+            body=maybe_transform(
+                {
+                    "model": model,
+                    "training_file": training_file,
+                    "hyperparameters": hyperparameters,
+                    "integrations": integrations,
+                    "seed": seed,
+                    "suffix": suffix,
+                    "validation_file": validation_file,
+                },
+                job_create_params.JobCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    def retrieve(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Get info about a fine-tuning job.
+
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}"
+            )
+        return self._get(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[FineTuningJob]:
+        """
+        List your organization's fine-tuning jobs
+
+        Args:
+          after: Identifier for the last job from the previous pagination request.
+
+          limit: Number of fine-tuning jobs to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/fine_tuning/jobs",
+            page=SyncCursorPage[FineTuningJob],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    job_list_params.JobListParams,
+                ),
+            ),
+            model=FineTuningJob,
+        )
+
+    def cancel(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Immediately cancel a fine-tune job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}"
+            )
+        return self._post(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    def list_events(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[FineTuningJobEvent]:
+        """
+        Get status updates for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last event from the previous pagination request.
+
+          limit: Number of events to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}"
+            )
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/events",
+            page=SyncCursorPage[FineTuningJobEvent],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    job_list_events_params.JobListEventsParams,
+                ),
+            ),
+            model=FineTuningJobEvent,
+        )
+
+
+class AsyncJobs(AsyncAPIResource):
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpoints:
+        return AsyncCheckpoints(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncJobsWithRawResponse:
+        return AsyncJobsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncJobsWithStreamingResponse:
+        return AsyncJobsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
+        training_file: str,
+        hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+        integrations: Optional[Iterable[job_create_params.Integration]]
+        | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        suffix: Optional[str] | NotGiven = NOT_GIVEN,
+        validation_file: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Creates a fine-tuning job which begins the process of creating a new model from
+        a given dataset.
+
+        Response includes details of the enqueued job including job status and the name
+        of the fine-tuned models once complete.
+
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+
+        Args:
+          model: The name of the model to fine-tune. You can select one of the
+              [supported models](https://platform.openai.com/docs/guides/fine-tuning/what-models-can-be-fine-tuned).
+
+          training_file: The ID of an uploaded file that contains training data.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your dataset must be formatted as a JSONL file. Additionally, you must upload
+              your file with the purpose `fine-tune`.
+
+              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              for more details.
+
+          hyperparameters: The hyperparameters used for the fine-tuning job.
+
+          integrations: A list of integrations to enable for your fine-tuning job.
+
+          seed: The seed controls the reproducibility of the job. Passing in the same seed and
+              job parameters should produce the same results, but may differ in rare cases. If
+              a seed is not specified, one will be generated for you.
+
+          suffix: A string of up to 18 characters that will be added to your fine-tuned model
+              name.
+
+              For example, a `suffix` of "custom-model-name" would produce a model name like
+              `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
+
+          validation_file: The ID of an uploaded file that contains validation data.
+
+              If you provide this file, the data is used to generate validation metrics
+              periodically during fine-tuning. These metrics can be viewed in the fine-tuning
+              results file. The same data should not be present in both train and validation
+              files.
+
+              Your dataset must be formatted as a JSONL file. You must upload your file with
+              the purpose `fine-tune`.
+
+              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/fine_tuning/jobs",
+            body=await async_maybe_transform(
+                {
+                    "model": model,
+                    "training_file": training_file,
+                    "hyperparameters": hyperparameters,
+                    "integrations": integrations,
+                    "seed": seed,
+                    "suffix": suffix,
+                    "validation_file": validation_file,
+                },
+                job_create_params.JobCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    async def retrieve(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Get info about a fine-tuning job.
+
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}"
+            )
+        return await self._get(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[FineTuningJob, AsyncCursorPage[FineTuningJob]]:
+        """
+        List your organization's fine-tuning jobs
+
+        Args:
+          after: Identifier for the last job from the previous pagination request.
+
+          limit: Number of fine-tuning jobs to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/fine_tuning/jobs",
+            page=AsyncCursorPage[FineTuningJob],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    job_list_params.JobListParams,
+                ),
+            ),
+            model=FineTuningJob,
+        )
+
+    async def cancel(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Immediately cancel a fine-tune job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}"
+            )
+        return await self._post(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    def list_events(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[FineTuningJobEvent, AsyncCursorPage[FineTuningJobEvent]]:
+        """
+        Get status updates for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last event from the previous pagination request.
+
+          limit: Number of events to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}"
+            )
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/events",
+            page=AsyncCursorPage[FineTuningJobEvent],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    job_list_events_params.JobListEventsParams,
+                ),
+            ),
+            model=FineTuningJobEvent,
+        )
+
+
+class JobsWithRawResponse:
+    def __init__(self, jobs: Jobs) -> None:
+        self._jobs = jobs
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            jobs.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            jobs.list,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            jobs.cancel,
+        )
+        self.list_events = _legacy_response.to_raw_response_wrapper(
+            jobs.list_events,
+        )
+
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithRawResponse:
+        return CheckpointsWithRawResponse(self._jobs.checkpoints)
+
+
+class AsyncJobsWithRawResponse:
+    def __init__(self, jobs: AsyncJobs) -> None:
+        self._jobs = jobs
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            jobs.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            jobs.list,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            jobs.cancel,
+        )
+        self.list_events = _legacy_response.async_to_raw_response_wrapper(
+            jobs.list_events,
+        )
+
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithRawResponse:
+        return AsyncCheckpointsWithRawResponse(self._jobs.checkpoints)
+
+
+class JobsWithStreamingResponse:
+    def __init__(self, jobs: Jobs) -> None:
+        self._jobs = jobs
+
+        self.create = to_streamed_response_wrapper(
+            jobs.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            jobs.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            jobs.cancel,
+        )
+        self.list_events = to_streamed_response_wrapper(
+            jobs.list_events,
+        )
+
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithStreamingResponse:
+        return CheckpointsWithStreamingResponse(self._jobs.checkpoints)
+
+
+class AsyncJobsWithStreamingResponse:
+    def __init__(self, jobs: AsyncJobs) -> None:
+        self._jobs = jobs
+
+        self.create = async_to_streamed_response_wrapper(
+            jobs.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            jobs.list,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            jobs.cancel,
+        )
+        self.list_events = async_to_streamed_response_wrapper(
+            jobs.list_events,
+        )
+
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithStreamingResponse:
+        return AsyncCheckpointsWithStreamingResponse(self._jobs.checkpoints)
diff --git a/portkey_ai/_vendor/openai/resources/images.py b/portkey_ai/_vendor/openai/resources/images.py
new file mode 100644
index 00000000..5629a5d0
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/images.py
@@ -0,0 +1,635 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Mapping, Optional, cast
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import (
+    image_edit_params,
+    image_generate_params,
+    image_create_variation_params,
+)
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from .._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .._base_client import (
+    make_request_options,
+)
+from ..types.images_response import ImagesResponse
+
+__all__ = ["Images", "AsyncImages"]
+
+
+class Images(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ImagesWithRawResponse:
+        return ImagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ImagesWithStreamingResponse:
+        return ImagesWithStreamingResponse(self)
+
+    def create_variation(
+        self,
+        *,
+        image: FileTypes,
+        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024"]]
+        | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates a variation of a given image.
+
+        Args:
+          image: The image to use as the basis for the variation(s). Must be a valid PNG file,
+              less than 4MB, and square.
+
+          model: The model to use for image generation. Only `dall-e-2` is supported at this
+              time.
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "image": image,
+                "model": model,
+                "n": n,
+                "response_format": response_format,
+                "size": size,
+                "user": user,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"]])
+        if files:
+            # It should be noted that the actual Content-Type header that will be
+            # sent to the server will contain a `boundary` parameter, e.g.
+            # multipart/form-data; boundary=---abc--
+            extra_headers = {
+                "Content-Type": "multipart/form-data",
+                **(extra_headers or {}),
+            }
+        return self._post(
+            "/images/variations",
+            body=maybe_transform(
+                body, image_create_variation_params.ImageCreateVariationParams
+            ),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=ImagesResponse,
+        )
+
+    def edit(
+        self,
+        *,
+        image: FileTypes,
+        prompt: str,
+        mask: FileTypes | NotGiven = NOT_GIVEN,
+        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024"]]
+        | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates an edited or extended image given an original image and a prompt.
+
+        Args:
+          image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask
+              is not provided, image must have transparency, which will be used as the mask.
+
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters.
+
+          mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
+              indicate where `image` should be edited. Must be a valid PNG file, less than
+              4MB, and have the same dimensions as `image`.
+
+          model: The model to use for image generation. Only `dall-e-2` is supported at this
+              time.
+
+          n: The number of images to generate. Must be between 1 and 10.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "image": image,
+                "prompt": prompt,
+                "mask": mask,
+                "model": model,
+                "n": n,
+                "response_format": response_format,
+                "size": size,
+                "user": user,
+            }
+        )
+        files = extract_files(
+            cast(Mapping[str, object], body), paths=[["image"], ["mask"]]
+        )
+        if files:
+            # It should be noted that the actual Content-Type header that will be
+            # sent to the server will contain a `boundary` parameter, e.g.
+            # multipart/form-data; boundary=---abc--
+            extra_headers = {
+                "Content-Type": "multipart/form-data",
+                **(extra_headers or {}),
+            }
+        return self._post(
+            "/images/edits",
+            body=maybe_transform(body, image_edit_params.ImageEditParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=ImagesResponse,
+        )
+
+    def generate(
+        self,
+        *,
+        prompt: str,
+        model: Union[str, Literal["dall-e-2", "dall-e-3"], None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[
+            Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]
+        ]
+        | NotGiven = NOT_GIVEN,
+        style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates an image given a prompt.
+
+        Args:
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters for `dall-e-2` and 4000 characters for `dall-e-3`.
+
+          model: The model to use for image generation.
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
+
+          quality: The quality of the image that will be generated. `hd` creates images with finer
+              details and greater consistency across the image. This param is only supported
+              for `dall-e-3`.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
+              `1024x1792` for `dall-e-3` models.
+
+          style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid
+              causes the model to lean towards generating hyper-real and dramatic images.
+              Natural causes the model to produce more natural, less hyper-real looking
+              images. This param is only supported for `dall-e-3`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/images/generations",
+            body=maybe_transform(
+                {
+                    "prompt": prompt,
+                    "model": model,
+                    "n": n,
+                    "quality": quality,
+                    "response_format": response_format,
+                    "size": size,
+                    "style": style,
+                    "user": user,
+                },
+                image_generate_params.ImageGenerateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=ImagesResponse,
+        )
+
+
+class AsyncImages(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncImagesWithRawResponse:
+        return AsyncImagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncImagesWithStreamingResponse:
+        return AsyncImagesWithStreamingResponse(self)
+
+    async def create_variation(
+        self,
+        *,
+        image: FileTypes,
+        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024"]]
+        | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates a variation of a given image.
+
+        Args:
+          image: The image to use as the basis for the variation(s). Must be a valid PNG file,
+              less than 4MB, and square.
+
+          model: The model to use for image generation. Only `dall-e-2` is supported at this
+              time.
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "image": image,
+                "model": model,
+                "n": n,
+                "response_format": response_format,
+                "size": size,
+                "user": user,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"]])
+        if files:
+            # It should be noted that the actual Content-Type header that will be
+            # sent to the server will contain a `boundary` parameter, e.g.
+            # multipart/form-data; boundary=---abc--
+            extra_headers = {
+                "Content-Type": "multipart/form-data",
+                **(extra_headers or {}),
+            }
+        return await self._post(
+            "/images/variations",
+            body=await async_maybe_transform(
+                body, image_create_variation_params.ImageCreateVariationParams
+            ),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=ImagesResponse,
+        )
+
+    async def edit(
+        self,
+        *,
+        image: FileTypes,
+        prompt: str,
+        mask: FileTypes | NotGiven = NOT_GIVEN,
+        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024"]]
+        | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates an edited or extended image given an original image and a prompt.
+
+        Args:
+          image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask
+              is not provided, image must have transparency, which will be used as the mask.
+
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters.
+
+          mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
+              indicate where `image` should be edited. Must be a valid PNG file, less than
+              4MB, and have the same dimensions as `image`.
+
+          model: The model to use for image generation. Only `dall-e-2` is supported at this
+              time.
+
+          n: The number of images to generate. Must be between 1 and 10.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "image": image,
+                "prompt": prompt,
+                "mask": mask,
+                "model": model,
+                "n": n,
+                "response_format": response_format,
+                "size": size,
+                "user": user,
+            }
+        )
+        files = extract_files(
+            cast(Mapping[str, object], body), paths=[["image"], ["mask"]]
+        )
+        if files:
+            # It should be noted that the actual Content-Type header that will be
+            # sent to the server will contain a `boundary` parameter, e.g.
+            # multipart/form-data; boundary=---abc--
+            extra_headers = {
+                "Content-Type": "multipart/form-data",
+                **(extra_headers or {}),
+            }
+        return await self._post(
+            "/images/edits",
+            body=await async_maybe_transform(body, image_edit_params.ImageEditParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=ImagesResponse,
+        )
+
+    async def generate(
+        self,
+        *,
+        prompt: str,
+        model: Union[str, Literal["dall-e-2", "dall-e-3"], None] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
+        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
+        size: Optional[
+            Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]
+        ]
+        | NotGiven = NOT_GIVEN,
+        style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ImagesResponse:
+        """
+        Creates an image given a prompt.
+
+        Args:
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters for `dall-e-2` and 4000 characters for `dall-e-3`.
+
+          model: The model to use for image generation.
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
+
+          quality: The quality of the image that will be generated. `hd` creates images with finer
+              details and greater consistency across the image. This param is only supported
+              for `dall-e-3`.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
+
+          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
+              `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
+              `1024x1792` for `dall-e-3` models.
+
+          style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid
+              causes the model to lean towards generating hyper-real and dramatic images.
+              Natural causes the model to produce more natural, less hyper-real looking
+              images. This param is only supported for `dall-e-3`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/images/generations",
+            body=await async_maybe_transform(
+                {
+                    "prompt": prompt,
+                    "model": model,
+                    "n": n,
+                    "quality": quality,
+                    "response_format": response_format,
+                    "size": size,
+                    "style": style,
+                    "user": user,
+                },
+                image_generate_params.ImageGenerateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=ImagesResponse,
+        )
+
+
+class ImagesWithRawResponse:
+    def __init__(self, images: Images) -> None:
+        self._images = images
+
+        self.create_variation = _legacy_response.to_raw_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = _legacy_response.to_raw_response_wrapper(
+            images.edit,
+        )
+        self.generate = _legacy_response.to_raw_response_wrapper(
+            images.generate,
+        )
+
+
+class AsyncImagesWithRawResponse:
+    def __init__(self, images: AsyncImages) -> None:
+        self._images = images
+
+        self.create_variation = _legacy_response.async_to_raw_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = _legacy_response.async_to_raw_response_wrapper(
+            images.edit,
+        )
+        self.generate = _legacy_response.async_to_raw_response_wrapper(
+            images.generate,
+        )
+
+
+class ImagesWithStreamingResponse:
+    def __init__(self, images: Images) -> None:
+        self._images = images
+
+        self.create_variation = to_streamed_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = to_streamed_response_wrapper(
+            images.edit,
+        )
+        self.generate = to_streamed_response_wrapper(
+            images.generate,
+        )
+
+
+class AsyncImagesWithStreamingResponse:
+    def __init__(self, images: AsyncImages) -> None:
+        self._images = images
+
+        self.create_variation = async_to_streamed_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = async_to_streamed_response_wrapper(
+            images.edit,
+        )
+        self.generate = async_to_streamed_response_wrapper(
+            images.generate,
+        )
diff --git a/portkey_ai/_vendor/openai/resources/models.py b/portkey_ai/_vendor/openai/resources/models.py
new file mode 100644
index 00000000..2d32ca9f
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/models.py
@@ -0,0 +1,310 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .. import _legacy_response
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..pagination import SyncPage, AsyncPage
+from ..types.model import Model
+from .._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ..types.model_deleted import ModelDeleted
+
+__all__ = ["Models", "AsyncModels"]
+
+
+class Models(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ModelsWithRawResponse:
+        return ModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        return ModelsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Model:
+        """
+        Retrieves a model instance, providing basic information about the model such as
+        the owner and permissioning.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(
+                f"Expected a non-empty value for `model` but received {model!r}"
+            )
+        return self._get(
+            f"/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Model,
+        )
+
+    def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[Model]:
+        """
+        Lists the currently available models, and provides basic information about each
+        one such as the owner and availability.
+        """
+        return self._get_api_list(
+            "/models",
+            page=SyncPage[Model],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            model=Model,
+        )
+
+    def delete(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelDeleted:
+        """Delete a fine-tuned model.
+
+        You must have the Owner role in your organization to
+        delete a model.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(
+                f"Expected a non-empty value for `model` but received {model!r}"
+            )
+        return self._delete(
+            f"/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=ModelDeleted,
+        )
+
+
+class AsyncModels(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        return AsyncModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        return AsyncModelsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Model:
+        """
+        Retrieves a model instance, providing basic information about the model such as
+        the owner and permissioning.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(
+                f"Expected a non-empty value for `model` but received {model!r}"
+            )
+        return await self._get(
+            f"/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=Model,
+        )
+
+    def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Model, AsyncPage[Model]]:
+        """
+        Lists the currently available models, and provides basic information about each
+        one such as the owner and availability.
+        """
+        return self._get_api_list(
+            "/models",
+            page=AsyncPage[Model],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            model=Model,
+        )
+
+    async def delete(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelDeleted:
+        """Delete a fine-tuned model.
+
+        You must have the Owner role in your organization to
+        delete a model.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(
+                f"Expected a non-empty value for `model` but received {model!r}"
+            )
+        return await self._delete(
+            f"/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=ModelDeleted,
+        )
+
+
+class ModelsWithRawResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            models.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            models.delete,
+        )
+
+
+class AsyncModelsWithRawResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            models.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            models.delete,
+        )
+
+
+class ModelsWithStreamingResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            models.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            models.delete,
+        )
+
+
+class AsyncModelsWithStreamingResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            models.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            models.delete,
+        )
diff --git a/portkey_ai/_vendor/openai/resources/moderations.py b/portkey_ai/_vendor/openai/resources/moderations.py
new file mode 100644
index 00000000..95926bdd
--- /dev/null
+++ b/portkey_ai/_vendor/openai/resources/moderations.py
@@ -0,0 +1,189 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import moderation_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .._base_client import (
+    make_request_options,
+)
+from ..types.moderation_create_response import ModerationCreateResponse
+
+__all__ = ["Moderations", "AsyncModerations"]
+
+
+class Moderations(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ModerationsWithRawResponse:
+        return ModerationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ModerationsWithStreamingResponse:
+        return ModerationsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        input: Union[str, List[str]],
+        model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModerationCreateResponse:
+        """
+        Classifies if text is potentially harmful.
+
+        Args:
+          input: The input text to classify
+
+          model: Two content moderations models are available: `text-moderation-stable` and
+              `text-moderation-latest`.
+
+              The default is `text-moderation-latest` which will be automatically upgraded
+              over time. This ensures you are always using our most accurate model. If you use
+              `text-moderation-stable`, we will provide advanced notice before updating the
+              model. Accuracy of `text-moderation-stable` may be slightly lower than for
+              `text-moderation-latest`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/moderations",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                },
+                moderation_create_params.ModerationCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=ModerationCreateResponse,
+        )
+
+
+class AsyncModerations(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncModerationsWithRawResponse:
+        return AsyncModerationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncModerationsWithStreamingResponse:
+        return AsyncModerationsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        input: Union[str, List[str]],
+        model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModerationCreateResponse:
+        """
+        Classifies if text is potentially harmful.
+
+        Args:
+          input: The input text to classify
+
+          model: Two content moderations models are available: `text-moderation-stable` and
+              `text-moderation-latest`.
+
+              The default is `text-moderation-latest` which will be automatically upgraded
+              over time. This ensures you are always using our most accurate model. If you use
+              `text-moderation-stable`, we will provide advanced notice before updating the
+              model. Accuracy of `text-moderation-stable` may be slightly lower than for
+              `text-moderation-latest`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/moderations",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                },
+                moderation_create_params.ModerationCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            ),
+            cast_to=ModerationCreateResponse,
+        )
+
+
+class ModerationsWithRawResponse:
+    def __init__(self, moderations: Moderations) -> None:
+        self._moderations = moderations
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            moderations.create,
+        )
+
+
+class AsyncModerationsWithRawResponse:
+    def __init__(self, moderations: AsyncModerations) -> None:
+        self._moderations = moderations
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            moderations.create,
+        )
+
+
+class ModerationsWithStreamingResponse:
+    def __init__(self, moderations: Moderations) -> None:
+        self._moderations = moderations
+
+        self.create = to_streamed_response_wrapper(
+            moderations.create,
+        )
+
+
+class AsyncModerationsWithStreamingResponse:
+    def __init__(self, moderations: AsyncModerations) -> None:
+        self._moderations = moderations
+
+        self.create = async_to_streamed_response_wrapper(
+            moderations.create,
+        )
diff --git a/portkey_ai/_vendor/openai/types/__init__.py b/portkey_ai/_vendor/openai/types/__init__.py
new file mode 100644
index 00000000..be02f633
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/__init__.py
@@ -0,0 +1,43 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .batch import Batch as Batch
+from .image import Image as Image
+from .model import Model as Model
+from .shared import (
+    ErrorObject as ErrorObject,
+    FunctionDefinition as FunctionDefinition,
+    FunctionParameters as FunctionParameters,
+)
+from .embedding import Embedding as Embedding
+from .chat_model import ChatModel as ChatModel
+from .completion import Completion as Completion
+from .moderation import Moderation as Moderation
+from .batch_error import BatchError as BatchError
+from .file_object import FileObject as FileObject
+from .file_content import FileContent as FileContent
+from .file_deleted import FileDeleted as FileDeleted
+from .model_deleted import ModelDeleted as ModelDeleted
+from .images_response import ImagesResponse as ImagesResponse
+from .completion_usage import CompletionUsage as CompletionUsage
+from .file_list_params import FileListParams as FileListParams
+from .batch_list_params import BatchListParams as BatchListParams
+from .completion_choice import CompletionChoice as CompletionChoice
+from .image_edit_params import ImageEditParams as ImageEditParams
+from .file_create_params import FileCreateParams as FileCreateParams
+from .batch_create_params import BatchCreateParams as BatchCreateParams
+from .batch_request_counts import BatchRequestCounts as BatchRequestCounts
+from .image_generate_params import ImageGenerateParams as ImageGenerateParams
+from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
+from .completion_create_params import CompletionCreateParams as CompletionCreateParams
+from .moderation_create_params import ModerationCreateParams as ModerationCreateParams
+from .create_embedding_response import (
+    CreateEmbeddingResponse as CreateEmbeddingResponse,
+)
+from .moderation_create_response import (
+    ModerationCreateResponse as ModerationCreateResponse,
+)
+from .image_create_variation_params import (
+    ImageCreateVariationParams as ImageCreateVariationParams,
+)
diff --git a/portkey_ai/_vendor/openai/types/audio/__init__.py b/portkey_ai/_vendor/openai/types/audio/__init__.py
new file mode 100644
index 00000000..a7b3ca35
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/audio/__init__.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .translation import Translation as Translation
+from .transcription import Transcription as Transcription
+from .speech_create_params import SpeechCreateParams as SpeechCreateParams
+from .translation_create_params import (
+    TranslationCreateParams as TranslationCreateParams,
+)
+from .transcription_create_params import (
+    TranscriptionCreateParams as TranscriptionCreateParams,
+)
diff --git a/portkey_ai/_vendor/openai/types/audio/speech_create_params.py b/portkey_ai/_vendor/openai/types/audio/speech_create_params.py
new file mode 100644
index 00000000..8d75ec4c
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/audio/speech_create_params.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["SpeechCreateParams"]
+
+
+class SpeechCreateParams(TypedDict, total=False):
+    input: Required[str]
+    """The text to generate audio for. The maximum length is 4096 characters."""
+
+    model: Required[Union[str, Literal["tts-1", "tts-1-hd"]]]
+    """
+    One of the available [TTS models](https://platform.openai.com/docs/models/tts):
+    `tts-1` or `tts-1-hd`
+    """
+
+    voice: Required[Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]]
+    """The voice to use when generating the audio.
+
+    Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`.
+    Previews of the voices are available in the
+    [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
+    """
+
+    response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"]
+    """The format to audio in.
+
+    Supported formats are `mp3`, `opus`, `aac`, `flac`, `wav`, and `pcm`.
+    """
+
+    speed: float
+    """The speed of the generated audio.
+
+    Select a value from `0.25` to `4.0`. `1.0` is the default.
+    """
diff --git a/portkey_ai/_vendor/openai/types/audio/transcription.py b/portkey_ai/_vendor/openai/types/audio/transcription.py
new file mode 100644
index 00000000..edb5f227
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/audio/transcription.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+
+__all__ = ["Transcription"]
+
+
+class Transcription(BaseModel):
+    text: str
+    """The transcribed text."""
diff --git a/portkey_ai/_vendor/openai/types/audio/transcription_create_params.py b/portkey_ai/_vendor/openai/types/audio/transcription_create_params.py
new file mode 100644
index 00000000..6b2d5bae
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/audio/transcription_create_params.py
@@ -0,0 +1,65 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union
+from typing_extensions import Literal, Required, TypedDict
+
+from ..._types import FileTypes
+
+__all__ = ["TranscriptionCreateParams"]
+
+
+class TranscriptionCreateParams(TypedDict, total=False):
+    file: Required[FileTypes]
+    """
+    The audio file object (not file name) to transcribe, in one of these formats:
+    flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+    """
+
+    model: Required[Union[str, Literal["whisper-1"]]]
+    """ID of the model to use.
+
+    Only `whisper-1` (which is powered by our open source Whisper V2 model) is
+    currently available.
+    """
+
+    language: str
+    """The language of the input audio.
+
+    Supplying the input language in
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
+    improve accuracy and latency.
+    """
+
+    prompt: str
+    """An optional text to guide the model's style or continue a previous audio
+    segment.
+
+    The [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+    should match the audio language.
+    """
+
+    response_format: Literal["json", "text", "srt", "verbose_json", "vtt"]
+    """
+    The format of the transcript output, in one of these options: `json`, `text`,
+    `srt`, `verbose_json`, or `vtt`.
+    """
+
+    temperature: float
+    """The sampling temperature, between 0 and 1.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic. If set to 0, the model will use
+    [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+    automatically increase the temperature until certain thresholds are hit.
+    """
+
+    timestamp_granularities: List[Literal["word", "segment"]]
+    """The timestamp granularities to populate for this transcription.
+
+    `response_format` must be set `verbose_json` to use timestamp granularities.
+    Either or both of these options are supported: `word`, or `segment`. Note: There
+    is no additional latency for segment timestamps, but generating word timestamps
+    incurs additional latency.
+    """
diff --git a/portkey_ai/_vendor/openai/types/audio/translation.py b/portkey_ai/_vendor/openai/types/audio/translation.py
new file mode 100644
index 00000000..7c0e9051
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/audio/translation.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+
+__all__ = ["Translation"]
+
+
+class Translation(BaseModel):
+    text: str
diff --git a/portkey_ai/_vendor/openai/types/audio/translation_create_params.py b/portkey_ai/_vendor/openai/types/audio/translation_create_params.py
new file mode 100644
index 00000000..f23a41ed
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/audio/translation_create_params.py
@@ -0,0 +1,48 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypedDict
+
+from ..._types import FileTypes
+
+__all__ = ["TranslationCreateParams"]
+
+
+class TranslationCreateParams(TypedDict, total=False):
+    file: Required[FileTypes]
+    """
+    The audio file object (not file name) translate, in one of these formats: flac,
+    mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+    """
+
+    model: Required[Union[str, Literal["whisper-1"]]]
+    """ID of the model to use.
+
+    Only `whisper-1` (which is powered by our open source Whisper V2 model) is
+    currently available.
+    """
+
+    prompt: str
+    """An optional text to guide the model's style or continue a previous audio
+    segment.
+
+    The [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+    should be in English.
+    """
+
+    response_format: str
+    """
+    The format of the transcript output, in one of these options: `json`, `text`,
+    `srt`, `verbose_json`, or `vtt`.
+    """
+
+    temperature: float
+    """The sampling temperature, between 0 and 1.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic. If set to 0, the model will use
+    [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+    automatically increase the temperature until certain thresholds are hit.
+    """
diff --git a/portkey_ai/_vendor/openai/types/batch.py b/portkey_ai/_vendor/openai/types/batch.py
new file mode 100644
index 00000000..916e1d21
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/batch.py
@@ -0,0 +1,92 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import builtins
+from typing import List, Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .batch_error import BatchError
+from .batch_request_counts import BatchRequestCounts
+
+__all__ = ["Batch", "Errors"]
+
+
+class Errors(BaseModel):
+    data: Optional[List[BatchError]] = None
+
+    object: Optional[str] = None
+    """The object type, which is always `list`."""
+
+
+class Batch(BaseModel):
+    id: str
+
+    completion_window: str
+    """The time frame within which the batch should be processed."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the batch was created."""
+
+    endpoint: str
+    """The OpenAI API endpoint used by the batch."""
+
+    input_file_id: str
+    """The ID of the input file for the batch."""
+
+    object: Literal["batch"]
+    """The object type, which is always `batch`."""
+
+    status: Literal[
+        "validating",
+        "failed",
+        "in_progress",
+        "finalizing",
+        "completed",
+        "expired",
+        "cancelling",
+        "cancelled",
+    ]
+    """The current status of the batch."""
+
+    cancelled_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch was cancelled."""
+
+    cancelling_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch started cancelling."""
+
+    completed_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch was completed."""
+
+    error_file_id: Optional[str] = None
+    """The ID of the file containing the outputs of requests with errors."""
+
+    errors: Optional[Errors] = None
+
+    expired_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch expired."""
+
+    expires_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch will expire."""
+
+    failed_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch failed."""
+
+    finalizing_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch started finalizing."""
+
+    in_progress_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch started processing."""
+
+    metadata: Optional[builtins.object] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    output_file_id: Optional[str] = None
+    """The ID of the file containing the outputs of successfully executed requests."""
+
+    request_counts: Optional[BatchRequestCounts] = None
+    """The request counts for different statuses within the batch."""
diff --git a/portkey_ai/_vendor/openai/types/batch_create_params.py b/portkey_ai/_vendor/openai/types/batch_create_params.py
new file mode 100644
index 00000000..63b4fae9
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/batch_create_params.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BatchCreateParams"]
+
+
+class BatchCreateParams(TypedDict, total=False):
+    completion_window: Required[Literal["24h"]]
+    """The time frame within which the batch should be processed.
+
+    Currently only `24h` is supported.
+    """
+
+    endpoint: Required[Literal["/v1/chat/completions", "/v1/embeddings"]]
+    """The endpoint to be used for all requests in the batch.
+
+    Currently `/v1/chat/completions` and `/v1/embeddings` are supported.
+    """
+
+    input_file_id: Required[str]
+    """The ID of an uploaded file that contains requests for the new batch.
+
+    See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+    for how to upload a file.
+
+    Your input file must be formatted as a
+    [JSONL file](https://platform.openai.com/docs/api-reference/batch/requestInput),
+    and must be uploaded with the purpose `batch`.
+    """
+
+    metadata: Optional[Dict[str, str]]
+    """Optional custom metadata for the batch."""
diff --git a/portkey_ai/_vendor/openai/types/batch_error.py b/portkey_ai/_vendor/openai/types/batch_error.py
new file mode 100644
index 00000000..1cdd808d
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/batch_error.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["BatchError"]
+
+
+class BatchError(BaseModel):
+    code: Optional[str] = None
+    """An error code identifying the error type."""
+
+    line: Optional[int] = None
+    """The line number of the input file where the error occurred, if applicable."""
+
+    message: Optional[str] = None
+    """A human-readable message providing more details about the error."""
+
+    param: Optional[str] = None
+    """The name of the parameter that caused the error, if applicable."""
diff --git a/portkey_ai/_vendor/openai/types/batch_list_params.py b/portkey_ai/_vendor/openai/types/batch_list_params.py
new file mode 100644
index 00000000..ef5e966b
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/batch_list_params.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["BatchListParams"]
+
+
+class BatchListParams(TypedDict, total=False):
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
diff --git a/portkey_ai/_vendor/openai/types/batch_request_counts.py b/portkey_ai/_vendor/openai/types/batch_request_counts.py
new file mode 100644
index 00000000..7e1d49fb
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/batch_request_counts.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .._models import BaseModel
+
+__all__ = ["BatchRequestCounts"]
+
+
+class BatchRequestCounts(BaseModel):
+    completed: int
+    """Number of requests that have been completed successfully."""
+
+    failed: int
+    """Number of requests that have failed."""
+
+    total: int
+    """Total number of requests in the batch."""
diff --git a/portkey_ai/_vendor/openai/types/beta/__init__.py b/portkey_ai/_vendor/openai/types/beta/__init__.py
new file mode 100644
index 00000000..80d6edce
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/__init__.py
@@ -0,0 +1,64 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .thread import Thread as Thread
+from .assistant import Assistant as Assistant
+from .vector_store import VectorStore as VectorStore
+from .function_tool import FunctionTool as FunctionTool
+from .assistant_tool import AssistantTool as AssistantTool
+from .thread_deleted import ThreadDeleted as ThreadDeleted
+from .file_search_tool import FileSearchTool as FileSearchTool
+from .assistant_deleted import AssistantDeleted as AssistantDeleted
+from .function_tool_param import FunctionToolParam as FunctionToolParam
+from .assistant_tool_param import AssistantToolParam as AssistantToolParam
+from .thread_create_params import ThreadCreateParams as ThreadCreateParams
+from .thread_update_params import ThreadUpdateParams as ThreadUpdateParams
+from .vector_store_deleted import VectorStoreDeleted as VectorStoreDeleted
+from .assistant_list_params import AssistantListParams as AssistantListParams
+from .assistant_tool_choice import AssistantToolChoice as AssistantToolChoice
+from .code_interpreter_tool import CodeInterpreterTool as CodeInterpreterTool
+from .assistant_stream_event import AssistantStreamEvent as AssistantStreamEvent
+from .file_search_tool_param import FileSearchToolParam as FileSearchToolParam
+from .assistant_create_params import AssistantCreateParams as AssistantCreateParams
+from .assistant_update_params import AssistantUpdateParams as AssistantUpdateParams
+from .vector_store_list_params import VectorStoreListParams as VectorStoreListParams
+from .assistant_response_format import (
+    AssistantResponseFormat as AssistantResponseFormat,
+)
+from .vector_store_create_params import (
+    VectorStoreCreateParams as VectorStoreCreateParams,
+)
+from .vector_store_update_params import (
+    VectorStoreUpdateParams as VectorStoreUpdateParams,
+)
+from .assistant_tool_choice_param import (
+    AssistantToolChoiceParam as AssistantToolChoiceParam,
+)
+from .code_interpreter_tool_param import (
+    CodeInterpreterToolParam as CodeInterpreterToolParam,
+)
+from .assistant_tool_choice_option import (
+    AssistantToolChoiceOption as AssistantToolChoiceOption,
+)
+from .thread_create_and_run_params import (
+    ThreadCreateAndRunParams as ThreadCreateAndRunParams,
+)
+from .assistant_tool_choice_function import (
+    AssistantToolChoiceFunction as AssistantToolChoiceFunction,
+)
+from .assistant_response_format_param import (
+    AssistantResponseFormatParam as AssistantResponseFormatParam,
+)
+from .assistant_response_format_option import (
+    AssistantResponseFormatOption as AssistantResponseFormatOption,
+)
+from .assistant_tool_choice_option_param import (
+    AssistantToolChoiceOptionParam as AssistantToolChoiceOptionParam,
+)
+from .assistant_tool_choice_function_param import (
+    AssistantToolChoiceFunctionParam as AssistantToolChoiceFunctionParam,
+)
+from .assistant_response_format_option_param import (
+    AssistantResponseFormatOptionParam as AssistantResponseFormatOptionParam,
+)
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant.py b/portkey_ai/_vendor/openai/types/beta/assistant.py
new file mode 100644
index 00000000..e94cb92d
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant.py
@@ -0,0 +1,131 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .assistant_tool import AssistantTool
+from .assistant_response_format_option import AssistantResponseFormatOption
+
+__all__ = [
+    "Assistant",
+    "ToolResources",
+    "ToolResourcesCodeInterpreter",
+    "ToolResourcesFileSearch",
+]
+
+
+class ToolResourcesCodeInterpreter(BaseModel):
+    file_ids: Optional[List[str]] = None
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter`` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ToolResourcesFileSearch(BaseModel):
+    vector_store_ids: Optional[List[str]] = None
+    """
+    The ID of the
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this assistant. There can be a maximum of 1 vector store attached to
+    the assistant.
+    """
+
+
+class ToolResources(BaseModel):
+    code_interpreter: Optional[ToolResourcesCodeInterpreter] = None
+
+    file_search: Optional[ToolResourcesFileSearch] = None
+
+
+class Assistant(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the assistant was created."""
+
+    description: Optional[str] = None
+    """The description of the assistant. The maximum length is 512 characters."""
+
+    instructions: Optional[str] = None
+    """The system instructions that the assistant uses.
+
+    The maximum length is 256,000 characters.
+    """
+
+    metadata: Optional[object] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    model: str
+    """ID of the model to use.
+
+    You can use the
+    [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+    see all of your available models, or see our
+    [Model overview](https://platform.openai.com/docs/models/overview) for
+    descriptions of them.
+    """
+
+    name: Optional[str] = None
+    """The name of the assistant. The maximum length is 256 characters."""
+
+    object: Literal["assistant"]
+    """The object type, which is always `assistant`."""
+
+    tools: List[AssistantTool]
+    """A list of tool enabled on the assistant.
+
+    There can be a maximum of 128 tools per assistant. Tools can be of types
+    `code_interpreter`, `file_search`, or `function`.
+    """
+
+    response_format: Optional[AssistantResponseFormatOption] = None
+    """Specifies the format that the model must output.
+
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float] = None
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
+    tool_resources: Optional[ToolResources] = None
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+    top_p: Optional[float] = None
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_create_params.py b/portkey_ai/_vendor/openai/types/beta/assistant_create_params.py
new file mode 100644
index 00000000..e9ff66df
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_create_params.py
@@ -0,0 +1,173 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .assistant_tool_param import AssistantToolParam
+from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = [
+    "AssistantCreateParams",
+    "ToolResources",
+    "ToolResourcesCodeInterpreter",
+    "ToolResourcesFileSearch",
+    "ToolResourcesFileSearchVectorStore",
+]
+
+
+class AssistantCreateParams(TypedDict, total=False):
+    model: Required[
+        Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ]
+    ]
+    """ID of the model to use.
+
+    You can use the
+    [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+    see all of your available models, or see our
+    [Model overview](https://platform.openai.com/docs/models/overview) for
+    descriptions of them.
+    """
+
+    description: Optional[str]
+    """The description of the assistant. The maximum length is 512 characters."""
+
+    instructions: Optional[str]
+    """The system instructions that the assistant uses.
+
+    The maximum length is 256,000 characters.
+    """
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    name: Optional[str]
+    """The name of the assistant. The maximum length is 256 characters."""
+
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
+    tool_resources: Optional[ToolResources]
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+    tools: Iterable[AssistantToolParam]
+    """A list of tool enabled on the assistant.
+
+    There can be a maximum of 128 tools per assistant. Tools can be of types
+    `code_interpreter`, `file_search`, or `function`.
+    """
+
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
+    """
+
+
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
+    add to the vector store. There can be a maximum of 10000 files in a vector
+    store.
+    """
+
+    metadata: object
+    """Set of 16 key-value pairs that can be attached to a vector store.
+
+    This can be useful for storing additional information about the vector store in
+    a structured format. Keys can be a maximum of 64 characters long and values can
+    be a maxium of 512 characters long.
+    """
+
+
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this assistant. There can be a maximum of 1 vector store attached to
+    the assistant.
+    """
+
+    vector_stores: Iterable[ToolResourcesFileSearchVectorStore]
+    """
+    A helper to create a
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    with file_ids and attach it to this assistant. There can be a maximum of 1
+    vector store attached to the assistant.
+    """
+
+
+class ToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
+
+    file_search: ToolResourcesFileSearch
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_deleted.py b/portkey_ai/_vendor/openai/types/beta/assistant_deleted.py
new file mode 100644
index 00000000..3be40cd6
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_deleted.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["AssistantDeleted"]
+
+
+class AssistantDeleted(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: Literal["assistant.deleted"]
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_list_params.py b/portkey_ai/_vendor/openai/types/beta/assistant_list_params.py
new file mode 100644
index 00000000..f54f6312
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_list_params.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["AssistantListParams"]
+
+
+class AssistantListParams(TypedDict, total=False):
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    before: str
+    """A cursor for use in pagination.
+
+    `before` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include before=obj_foo in order to fetch the previous page
+    of the list.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_response_format.py b/portkey_ai/_vendor/openai/types/beta/assistant_response_format.py
new file mode 100644
index 00000000..f53bdaf6
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_response_format.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["AssistantResponseFormat"]
+
+
+class AssistantResponseFormat(BaseModel):
+    type: Optional[Literal["text", "json_object"]] = None
+    """Must be one of `text` or `json_object`."""
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_response_format_option.py b/portkey_ai/_vendor/openai/types/beta/assistant_response_format_option.py
new file mode 100644
index 00000000..d4e05e0e
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_response_format_option.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_response_format import AssistantResponseFormat
+
+__all__ = ["AssistantResponseFormatOption"]
+
+AssistantResponseFormatOption = Union[Literal["none", "auto"], AssistantResponseFormat]
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_response_format_option_param.py b/portkey_ai/_vendor/openai/types/beta/assistant_response_format_option_param.py
new file mode 100644
index 00000000..cc8fca7b
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_response_format_option_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_response_format_param import AssistantResponseFormatParam
+
+__all__ = ["AssistantResponseFormatOptionParam"]
+
+AssistantResponseFormatOptionParam = Union[
+    Literal["none", "auto"], AssistantResponseFormatParam
+]
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_response_format_param.py b/portkey_ai/_vendor/openai/types/beta/assistant_response_format_param.py
new file mode 100644
index 00000000..96e1d021
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_response_format_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["AssistantResponseFormatParam"]
+
+
+class AssistantResponseFormatParam(TypedDict, total=False):
+    type: Literal["text", "json_object"]
+    """Must be one of `text` or `json_object`."""
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_stream_event.py b/portkey_ai/_vendor/openai/types/beta/assistant_stream_event.py
new file mode 100644
index 00000000..91925e93
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_stream_event.py
@@ -0,0 +1,279 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated
+
+from .thread import Thread
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .threads.run import Run
+from .threads.message import Message
+from ..shared.error_object import ErrorObject
+from .threads.runs.run_step import RunStep
+from .threads.message_delta_event import MessageDeltaEvent
+from .threads.runs.run_step_delta_event import RunStepDeltaEvent
+
+__all__ = [
+    "AssistantStreamEvent",
+    "ThreadCreated",
+    "ThreadRunCreated",
+    "ThreadRunQueued",
+    "ThreadRunInProgress",
+    "ThreadRunRequiresAction",
+    "ThreadRunCompleted",
+    "ThreadRunFailed",
+    "ThreadRunCancelling",
+    "ThreadRunCancelled",
+    "ThreadRunExpired",
+    "ThreadRunStepCreated",
+    "ThreadRunStepInProgress",
+    "ThreadRunStepDelta",
+    "ThreadRunStepCompleted",
+    "ThreadRunStepFailed",
+    "ThreadRunStepCancelled",
+    "ThreadRunStepExpired",
+    "ThreadMessageCreated",
+    "ThreadMessageInProgress",
+    "ThreadMessageDelta",
+    "ThreadMessageCompleted",
+    "ThreadMessageIncomplete",
+    "ErrorEvent",
+]
+
+
+class ThreadCreated(BaseModel):
+    data: Thread
+    """
+    Represents a thread that contains
+    [messages](https://platform.openai.com/docs/api-reference/messages).
+    """
+
+    event: Literal["thread.created"]
+
+
+class ThreadRunCreated(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.created"]
+
+
+class ThreadRunQueued(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.queued"]
+
+
+class ThreadRunInProgress(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.in_progress"]
+
+
+class ThreadRunRequiresAction(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.requires_action"]
+
+
+class ThreadRunCompleted(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.completed"]
+
+
+class ThreadRunFailed(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.failed"]
+
+
+class ThreadRunCancelling(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.cancelling"]
+
+
+class ThreadRunCancelled(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.cancelled"]
+
+
+class ThreadRunExpired(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.expired"]
+
+
+class ThreadRunStepCreated(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.created"]
+
+
+class ThreadRunStepInProgress(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.in_progress"]
+
+
+class ThreadRunStepDelta(BaseModel):
+    data: RunStepDeltaEvent
+    """Represents a run step delta i.e.
+
+    any changed fields on a run step during streaming.
+    """
+
+    event: Literal["thread.run.step.delta"]
+
+
+class ThreadRunStepCompleted(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.completed"]
+
+
+class ThreadRunStepFailed(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.failed"]
+
+
+class ThreadRunStepCancelled(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.cancelled"]
+
+
+class ThreadRunStepExpired(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.expired"]
+
+
+class ThreadMessageCreated(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.created"]
+
+
+class ThreadMessageInProgress(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.in_progress"]
+
+
+class ThreadMessageDelta(BaseModel):
+    data: MessageDeltaEvent
+    """Represents a message delta i.e.
+
+    any changed fields on a message during streaming.
+    """
+
+    event: Literal["thread.message.delta"]
+
+
+class ThreadMessageCompleted(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.completed"]
+
+
+class ThreadMessageIncomplete(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.incomplete"]
+
+
+class ErrorEvent(BaseModel):
+    data: ErrorObject
+
+    event: Literal["error"]
+
+
+AssistantStreamEvent = Annotated[
+    Union[
+        ThreadCreated,
+        ThreadRunCreated,
+        ThreadRunQueued,
+        ThreadRunInProgress,
+        ThreadRunRequiresAction,
+        ThreadRunCompleted,
+        ThreadRunFailed,
+        ThreadRunCancelling,
+        ThreadRunCancelled,
+        ThreadRunExpired,
+        ThreadRunStepCreated,
+        ThreadRunStepInProgress,
+        ThreadRunStepDelta,
+        ThreadRunStepCompleted,
+        ThreadRunStepFailed,
+        ThreadRunStepCancelled,
+        ThreadRunStepExpired,
+        ThreadMessageCreated,
+        ThreadMessageInProgress,
+        ThreadMessageDelta,
+        ThreadMessageCompleted,
+        ThreadMessageIncomplete,
+        ErrorEvent,
+    ],
+    PropertyInfo(discriminator="event"),
+]
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_tool.py b/portkey_ai/_vendor/openai/types/beta/assistant_tool.py
new file mode 100644
index 00000000..e637ee53
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_tool.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ..._utils import PropertyInfo
+from .function_tool import FunctionTool
+from .file_search_tool import FileSearchTool
+from .code_interpreter_tool import CodeInterpreterTool
+
+__all__ = ["AssistantTool"]
+
+AssistantTool = Annotated[
+    Union[CodeInterpreterTool, FileSearchTool, FunctionTool],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice.py b/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice.py
new file mode 100644
index 00000000..d73439f0
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .assistant_tool_choice_function import AssistantToolChoiceFunction
+
+__all__ = ["AssistantToolChoice"]
+
+
+class AssistantToolChoice(BaseModel):
+    type: Literal["function", "code_interpreter", "file_search"]
+    """The type of the tool. If type is `function`, the function name must be set"""
+
+    function: Optional[AssistantToolChoiceFunction] = None
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_function.py b/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_function.py
new file mode 100644
index 00000000..0c896d80
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_function.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+
+__all__ = ["AssistantToolChoiceFunction"]
+
+
+class AssistantToolChoiceFunction(BaseModel):
+    name: str
+    """The name of the function to call."""
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_function_param.py b/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_function_param.py
new file mode 100644
index 00000000..428857de
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_function_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["AssistantToolChoiceFunctionParam"]
+
+
+class AssistantToolChoiceFunctionParam(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to call."""
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_option.py b/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_option.py
new file mode 100644
index 00000000..7bd69ae4
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_option.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_tool_choice import AssistantToolChoice
+
+__all__ = ["AssistantToolChoiceOption"]
+
+AssistantToolChoiceOption = Union[
+    Literal["none", "auto", "required"], AssistantToolChoice
+]
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_option_param.py b/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_option_param.py
new file mode 100644
index 00000000..0508e404
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_option_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_tool_choice_param import AssistantToolChoiceParam
+
+__all__ = ["AssistantToolChoiceOptionParam"]
+
+AssistantToolChoiceOptionParam = Union[
+    Literal["none", "auto", "required"], AssistantToolChoiceParam
+]
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_param.py b/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_param.py
new file mode 100644
index 00000000..904f489e
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_tool_choice_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam
+
+__all__ = ["AssistantToolChoiceParam"]
+
+
+class AssistantToolChoiceParam(TypedDict, total=False):
+    type: Required[Literal["function", "code_interpreter", "file_search"]]
+    """The type of the tool. If type is `function`, the function name must be set"""
+
+    function: AssistantToolChoiceFunctionParam
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_tool_param.py b/portkey_ai/_vendor/openai/types/beta/assistant_tool_param.py
new file mode 100644
index 00000000..d0fedd03
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_tool_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+
+from .function_tool_param import FunctionToolParam
+from .file_search_tool_param import FileSearchToolParam
+from .code_interpreter_tool_param import CodeInterpreterToolParam
+
+__all__ = ["AssistantToolParam"]
+
+AssistantToolParam = Union[
+    CodeInterpreterToolParam, FileSearchToolParam, FunctionToolParam
+]
diff --git a/portkey_ai/_vendor/openai/types/beta/assistant_update_params.py b/portkey_ai/_vendor/openai/types/beta/assistant_update_params.py
new file mode 100644
index 00000000..4bfecdce
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/assistant_update_params.py
@@ -0,0 +1,124 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Iterable, Optional
+from typing_extensions import TypedDict
+
+from .assistant_tool_param import AssistantToolParam
+from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = [
+    "AssistantUpdateParams",
+    "ToolResources",
+    "ToolResourcesCodeInterpreter",
+    "ToolResourcesFileSearch",
+]
+
+
+class AssistantUpdateParams(TypedDict, total=False):
+    description: Optional[str]
+    """The description of the assistant. The maximum length is 512 characters."""
+
+    instructions: Optional[str]
+    """The system instructions that the assistant uses.
+
+    The maximum length is 256,000 characters.
+    """
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    model: str
+    """ID of the model to use.
+
+    You can use the
+    [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+    see all of your available models, or see our
+    [Model overview](https://platform.openai.com/docs/models/overview) for
+    descriptions of them.
+    """
+
+    name: Optional[str]
+    """The name of the assistant. The maximum length is 256 characters."""
+
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
+    tool_resources: Optional[ToolResources]
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+    tools: Iterable[AssistantToolParam]
+    """A list of tool enabled on the assistant.
+
+    There can be a maximum of 128 tools per assistant. Tools can be of types
+    `code_interpreter`, `file_search`, or `function`.
+    """
+
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
+    """
+
+
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    Overrides the list of
+    [file](https://platform.openai.com/docs/api-reference/files) IDs made available
+    to the `code_interpreter` tool. There can be a maximum of 20 files associated
+    with the tool.
+    """
+
+
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    Overrides the
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this assistant. There can be a maximum of 1 vector store attached to
+    the assistant.
+    """
+
+
+class ToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
+
+    file_search: ToolResourcesFileSearch
diff --git a/portkey_ai/_vendor/openai/types/beta/chat/__init__.py b/portkey_ai/_vendor/openai/types/beta/chat/__init__.py
new file mode 100644
index 00000000..f8ee8b14
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/chat/__init__.py
@@ -0,0 +1,3 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
diff --git a/portkey_ai/_vendor/openai/types/beta/code_interpreter_tool.py b/portkey_ai/_vendor/openai/types/beta/code_interpreter_tool.py
new file mode 100644
index 00000000..17ab3de6
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/code_interpreter_tool.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["CodeInterpreterTool"]
+
+
+class CodeInterpreterTool(BaseModel):
+    type: Literal["code_interpreter"]
+    """The type of tool being defined: `code_interpreter`"""
diff --git a/portkey_ai/_vendor/openai/types/beta/code_interpreter_tool_param.py b/portkey_ai/_vendor/openai/types/beta/code_interpreter_tool_param.py
new file mode 100644
index 00000000..4f6916d7
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/code_interpreter_tool_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["CodeInterpreterToolParam"]
+
+
+class CodeInterpreterToolParam(TypedDict, total=False):
+    type: Required[Literal["code_interpreter"]]
+    """The type of tool being defined: `code_interpreter`"""
diff --git a/portkey_ai/_vendor/openai/types/beta/file_search_tool.py b/portkey_ai/_vendor/openai/types/beta/file_search_tool.py
new file mode 100644
index 00000000..eea55ea6
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/file_search_tool.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FileSearchTool"]
+
+
+class FileSearchTool(BaseModel):
+    type: Literal["file_search"]
+    """The type of tool being defined: `file_search`"""
diff --git a/portkey_ai/_vendor/openai/types/beta/file_search_tool_param.py b/portkey_ai/_vendor/openai/types/beta/file_search_tool_param.py
new file mode 100644
index 00000000..d33fd06d
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/file_search_tool_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["FileSearchToolParam"]
+
+
+class FileSearchToolParam(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
diff --git a/portkey_ai/_vendor/openai/types/beta/function_tool.py b/portkey_ai/_vendor/openai/types/beta/function_tool.py
new file mode 100644
index 00000000..f9227678
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/function_tool.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..shared.function_definition import FunctionDefinition
+
+__all__ = ["FunctionTool"]
+
+
+class FunctionTool(BaseModel):
+    function: FunctionDefinition
+
+    type: Literal["function"]
+    """The type of tool being defined: `function`"""
diff --git a/portkey_ai/_vendor/openai/types/beta/function_tool_param.py b/portkey_ai/_vendor/openai/types/beta/function_tool_param.py
new file mode 100644
index 00000000..b44c0d47
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/function_tool_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from ...types import shared_params
+
+__all__ = ["FunctionToolParam"]
+
+
+class FunctionToolParam(TypedDict, total=False):
+    function: Required[shared_params.FunctionDefinition]
+
+    type: Required[Literal["function"]]
+    """The type of tool being defined: `function`"""
diff --git a/portkey_ai/_vendor/openai/types/beta/thread.py b/portkey_ai/_vendor/openai/types/beta/thread.py
new file mode 100644
index 00000000..a01f4073
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/thread.py
@@ -0,0 +1,65 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = [
+    "Thread",
+    "ToolResources",
+    "ToolResourcesCodeInterpreter",
+    "ToolResourcesFileSearch",
+]
+
+
+class ToolResourcesCodeInterpreter(BaseModel):
+    file_ids: Optional[List[str]] = None
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ToolResourcesFileSearch(BaseModel):
+    vector_store_ids: Optional[List[str]] = None
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this thread. There can be a maximum of 1 vector store attached to
+    the thread.
+    """
+
+
+class ToolResources(BaseModel):
+    code_interpreter: Optional[ToolResourcesCodeInterpreter] = None
+
+    file_search: Optional[ToolResourcesFileSearch] = None
+
+
+class Thread(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the thread was created."""
+
+    metadata: Optional[object] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    object: Literal["thread"]
+    """The object type, which is always `thread`."""
+
+    tool_resources: Optional[ToolResources] = None
+    """
+    A set of resources that are made available to the assistant's tools in this
+    thread. The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/thread_create_and_run_params.py b/portkey_ai/_vendor/openai/types/beta/thread_create_and_run_params.py
new file mode 100644
index 00000000..13d14df3
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/thread_create_and_run_params.py
@@ -0,0 +1,350 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .function_tool_param import FunctionToolParam
+from .file_search_tool_param import FileSearchToolParam
+from .code_interpreter_tool_param import CodeInterpreterToolParam
+from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = [
+    "ThreadCreateAndRunParamsBase",
+    "Thread",
+    "ThreadMessage",
+    "ThreadMessageAttachment",
+    "ThreadMessageAttachmentTool",
+    "ThreadToolResources",
+    "ThreadToolResourcesCodeInterpreter",
+    "ThreadToolResourcesFileSearch",
+    "ThreadToolResourcesFileSearchVectorStore",
+    "ToolResources",
+    "ToolResourcesCodeInterpreter",
+    "ToolResourcesFileSearch",
+    "Tool",
+    "TruncationStrategy",
+    "ThreadCreateAndRunParamsNonStreaming",
+    "ThreadCreateAndRunParamsStreaming",
+]
+
+
+class ThreadCreateAndRunParamsBase(TypedDict, total=False):
+    assistant_id: Required[str]
+    """
+    The ID of the
+    [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+    execute this run.
+    """
+
+    instructions: Optional[str]
+    """Override the default system message of the assistant.
+
+    This is useful for modifying the behavior on a per-run basis.
+    """
+
+    max_completion_tokens: Optional[int]
+    """
+    The maximum number of completion tokens that may be used over the course of the
+    run. The run will make a best effort to use only the number of completion tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    completion tokens specified, the run will end with status `incomplete`. See
+    `incomplete_details` for more info.
+    """
+
+    max_prompt_tokens: Optional[int]
+    """The maximum number of prompt tokens that may be used over the course of the run.
+
+    The run will make a best effort to use only the number of prompt tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    prompt tokens specified, the run will end with status `incomplete`. See
+    `incomplete_details` for more info.
+    """
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    model: Union[
+        str,
+        Literal[
+            "gpt-4-turbo",
+            "gpt-4-turbo-2024-04-09",
+            "gpt-4-0125-preview",
+            "gpt-4-turbo-preview",
+            "gpt-4-1106-preview",
+            "gpt-4-vision-preview",
+            "gpt-4",
+            "gpt-4-0314",
+            "gpt-4-0613",
+            "gpt-4-32k",
+            "gpt-4-32k-0314",
+            "gpt-4-32k-0613",
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-16k",
+            "gpt-3.5-turbo-0613",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-0125",
+            "gpt-3.5-turbo-16k-0613",
+        ],
+        None,
+    ]
+    """
+    The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+    be used to execute this run. If a value is provided here, it will override the
+    model associated with the assistant. If not, the model associated with the
+    assistant will be used.
+    """
+
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
+    thread: Thread
+    """If no thread is provided, an empty thread will be created."""
+
+    tool_choice: Optional[AssistantToolChoiceOptionParam]
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tools and instead generates a message. `auto` is the default value
+    and means the model can pick between generating a message or calling one or more
+    tools. `required` means the model must call one or more tools before responding
+    to the user. Specifying a particular tool like `{"type": "file_search"}` or
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+    """
+
+    tool_resources: Optional[ToolResources]
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+    tools: Optional[Iterable[Tool]]
+    """Override the tools the assistant can use for this run.
+
+    This is useful for modifying the behavior on a per-run basis.
+    """
+
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
+    """
+
+    truncation_strategy: Optional[TruncationStrategy]
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the intial context window of the run.
+    """
+
+
+ThreadMessageAttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+
+
+class ThreadMessageAttachment(TypedDict, total=False):
+    file_id: str
+    """The ID of the file to attach to the message."""
+
+    tools: Iterable[ThreadMessageAttachmentTool]
+    """The tools to add this file to."""
+
+
+class ThreadMessage(TypedDict, total=False):
+    content: Required[str]
+    """The content of the message."""
+
+    role: Required[Literal["user", "assistant"]]
+    """The role of the entity that is creating the message. Allowed values include:
+
+    - `user`: Indicates the message is sent by an actual user and should be used in
+      most cases to represent user-generated messages.
+    - `assistant`: Indicates the message is generated by the assistant. Use this
+      value to insert messages from the assistant into the conversation.
+    """
+
+    attachments: Optional[Iterable[ThreadMessageAttachment]]
+    """A list of files attached to the message, and the tools they should be added to."""
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+
+class ThreadToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ThreadToolResourcesFileSearchVectorStore(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
+    add to the vector store. There can be a maximum of 10000 files in a vector
+    store.
+    """
+
+    metadata: object
+    """Set of 16 key-value pairs that can be attached to a vector store.
+
+    This can be useful for storing additional information about the vector store in
+    a structured format. Keys can be a maximum of 64 characters long and values can
+    be a maxium of 512 characters long.
+    """
+
+
+class ThreadToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this thread. There can be a maximum of 1 vector store attached to
+    the thread.
+    """
+
+    vector_stores: Iterable[ThreadToolResourcesFileSearchVectorStore]
+    """
+    A helper to create a
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    with file_ids and attach it to this thread. There can be a maximum of 1 vector
+    store attached to the thread.
+    """
+
+
+class ThreadToolResources(TypedDict, total=False):
+    code_interpreter: ThreadToolResourcesCodeInterpreter
+
+    file_search: ThreadToolResourcesFileSearch
+
+
+class Thread(TypedDict, total=False):
+    messages: Iterable[ThreadMessage]
+    """
+    A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
+    start the thread with.
+    """
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    tool_resources: Optional[ThreadToolResources]
+    """
+    A set of resources that are made available to the assistant's tools in this
+    thread. The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The ID of the
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this assistant. There can be a maximum of 1 vector store attached to
+    the assistant.
+    """
+
+
+class ToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
+
+    file_search: ToolResourcesFileSearch
+
+
+Tool = Union[CodeInterpreterToolParam, FileSearchToolParam, FunctionToolParam]
+
+
+class TruncationStrategy(TypedDict, total=False):
+    type: Required[Literal["auto", "last_messages"]]
+    """The truncation strategy to use for the thread.
+
+    The default is `auto`. If set to `last_messages`, the thread will be truncated
+    to the n most recent messages in the thread. When set to `auto`, messages in the
+    middle of the thread will be dropped to fit the context length of the model,
+    `max_prompt_tokens`.
+    """
+
+    last_messages: Optional[int]
+    """
+    The number of most recent messages from the thread when constructing the context
+    for the run.
+    """
+
+
+class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase):
+    stream: Optional[Literal[False]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
+
+
+class ThreadCreateAndRunParamsStreaming(ThreadCreateAndRunParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
+
+
+ThreadCreateAndRunParams = Union[
+    ThreadCreateAndRunParamsNonStreaming, ThreadCreateAndRunParamsStreaming
+]
diff --git a/portkey_ai/_vendor/openai/types/beta/thread_create_params.py b/portkey_ai/_vendor/openai/types/beta/thread_create_params.py
new file mode 100644
index 00000000..ab2df21e
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/thread_create_params.py
@@ -0,0 +1,130 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .file_search_tool_param import FileSearchToolParam
+from .code_interpreter_tool_param import CodeInterpreterToolParam
+
+__all__ = [
+    "ThreadCreateParams",
+    "Message",
+    "MessageAttachment",
+    "MessageAttachmentTool",
+    "ToolResources",
+    "ToolResourcesCodeInterpreter",
+    "ToolResourcesFileSearch",
+    "ToolResourcesFileSearchVectorStore",
+]
+
+
+class ThreadCreateParams(TypedDict, total=False):
+    messages: Iterable[Message]
+    """
+    A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
+    start the thread with.
+    """
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    tool_resources: Optional[ToolResources]
+    """
+    A set of resources that are made available to the assistant's tools in this
+    thread. The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+
+MessageAttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+
+
+class MessageAttachment(TypedDict, total=False):
+    file_id: str
+    """The ID of the file to attach to the message."""
+
+    tools: Iterable[MessageAttachmentTool]
+    """The tools to add this file to."""
+
+
+class Message(TypedDict, total=False):
+    content: Required[str]
+    """The content of the message."""
+
+    role: Required[Literal["user", "assistant"]]
+    """The role of the entity that is creating the message. Allowed values include:
+
+    - `user`: Indicates the message is sent by an actual user and should be used in
+      most cases to represent user-generated messages.
+    - `assistant`: Indicates the message is generated by the assistant. Use this
+      value to insert messages from the assistant into the conversation.
+    """
+
+    attachments: Optional[Iterable[MessageAttachment]]
+    """A list of files attached to the message, and the tools they should be added to."""
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
+    add to the vector store. There can be a maximum of 10000 files in a vector
+    store.
+    """
+
+    metadata: object
+    """Set of 16 key-value pairs that can be attached to a vector store.
+
+    This can be useful for storing additional information about the vector store in
+    a structured format. Keys can be a maximum of 64 characters long and values can
+    be a maxium of 512 characters long.
+    """
+
+
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this thread. There can be a maximum of 1 vector store attached to
+    the thread.
+    """
+
+    vector_stores: Iterable[ToolResourcesFileSearchVectorStore]
+    """
+    A helper to create a
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    with file_ids and attach it to this thread. There can be a maximum of 1 vector
+    store attached to the thread.
+    """
+
+
+class ToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
+
+    file_search: ToolResourcesFileSearch
diff --git a/portkey_ai/_vendor/openai/types/beta/thread_deleted.py b/portkey_ai/_vendor/openai/types/beta/thread_deleted.py
new file mode 100644
index 00000000..d3856263
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/thread_deleted.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ThreadDeleted"]
+
+
+class ThreadDeleted(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: Literal["thread.deleted"]
diff --git a/portkey_ai/_vendor/openai/types/beta/thread_update_params.py b/portkey_ai/_vendor/openai/types/beta/thread_update_params.py
new file mode 100644
index 00000000..2203fb26
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/thread_update_params.py
@@ -0,0 +1,56 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Optional
+from typing_extensions import TypedDict
+
+__all__ = [
+    "ThreadUpdateParams",
+    "ToolResources",
+    "ToolResourcesCodeInterpreter",
+    "ToolResourcesFileSearch",
+]
+
+
+class ThreadUpdateParams(TypedDict, total=False):
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    tool_resources: Optional[ToolResources]
+    """
+    A set of resources that are made available to the assistant's tools in this
+    thread. The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this thread. There can be a maximum of 1 vector store attached to
+    the thread.
+    """
+
+
+class ToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
+
+    file_search: ToolResourcesFileSearch
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/__init__.py b/portkey_ai/_vendor/openai/types/beta/threads/__init__.py
new file mode 100644
index 00000000..f3ffd2bc
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/__init__.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .run import Run as Run
+from .text import Text as Text
+from .message import Message as Message
+from .annotation import Annotation as Annotation
+from .image_file import ImageFile as ImageFile
+from .run_status import RunStatus as RunStatus
+from .text_delta import TextDelta as TextDelta
+from .message_delta import MessageDelta as MessageDelta
+from .message_content import MessageContent as MessageContent
+from .message_deleted import MessageDeleted as MessageDeleted
+from .run_list_params import RunListParams as RunListParams
+from .annotation_delta import AnnotationDelta as AnnotationDelta
+from .image_file_delta import ImageFileDelta as ImageFileDelta
+from .text_delta_block import TextDeltaBlock as TextDeltaBlock
+from .run_create_params import RunCreateParams as RunCreateParams
+from .run_update_params import RunUpdateParams as RunUpdateParams
+from .text_content_block import TextContentBlock as TextContentBlock
+from .message_delta_event import MessageDeltaEvent as MessageDeltaEvent
+from .message_list_params import MessageListParams as MessageListParams
+from .file_path_annotation import FilePathAnnotation as FilePathAnnotation
+from .message_content_delta import MessageContentDelta as MessageContentDelta
+from .message_create_params import MessageCreateParams as MessageCreateParams
+from .message_update_params import MessageUpdateParams as MessageUpdateParams
+from .image_file_delta_block import ImageFileDeltaBlock as ImageFileDeltaBlock
+from .file_citation_annotation import FileCitationAnnotation as FileCitationAnnotation
+from .image_file_content_block import ImageFileContentBlock as ImageFileContentBlock
+from .file_path_delta_annotation import (
+    FilePathDeltaAnnotation as FilePathDeltaAnnotation,
+)
+from .file_citation_delta_annotation import (
+    FileCitationDeltaAnnotation as FileCitationDeltaAnnotation,
+)
+from .run_submit_tool_outputs_params import (
+    RunSubmitToolOutputsParams as RunSubmitToolOutputsParams,
+)
+from .required_action_function_tool_call import (
+    RequiredActionFunctionToolCall as RequiredActionFunctionToolCall,
+)
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/annotation.py b/portkey_ai/_vendor/openai/types/beta/threads/annotation.py
new file mode 100644
index 00000000..180418a7
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/annotation.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ...._utils import PropertyInfo
+from .file_path_annotation import FilePathAnnotation
+from .file_citation_annotation import FileCitationAnnotation
+
+__all__ = ["Annotation"]
+
+Annotation = Annotated[
+    Union[FileCitationAnnotation, FilePathAnnotation],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/annotation_delta.py b/portkey_ai/_vendor/openai/types/beta/threads/annotation_delta.py
new file mode 100644
index 00000000..4b10acc3
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/annotation_delta.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ...._utils import PropertyInfo
+from .file_path_delta_annotation import FilePathDeltaAnnotation
+from .file_citation_delta_annotation import FileCitationDeltaAnnotation
+
+__all__ = ["AnnotationDelta"]
+
+AnnotationDelta = Annotated[
+    Union[FileCitationDeltaAnnotation, FilePathDeltaAnnotation],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/file_citation_annotation.py b/portkey_ai/_vendor/openai/types/beta/threads/file_citation_annotation.py
new file mode 100644
index 00000000..68571cd4
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/file_citation_annotation.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FileCitationAnnotation", "FileCitation"]
+
+
+class FileCitation(BaseModel):
+    file_id: str
+    """The ID of the specific File the citation is from."""
+
+    quote: str
+    """The specific quote in the file."""
+
+
+class FileCitationAnnotation(BaseModel):
+    end_index: int
+
+    file_citation: FileCitation
+
+    start_index: int
+
+    text: str
+    """The text in the message content that needs to be replaced."""
+
+    type: Literal["file_citation"]
+    """Always `file_citation`."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/file_citation_delta_annotation.py b/portkey_ai/_vendor/openai/types/beta/threads/file_citation_delta_annotation.py
new file mode 100644
index 00000000..b40c0d12
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/file_citation_delta_annotation.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FileCitationDeltaAnnotation", "FileCitation"]
+
+
+class FileCitation(BaseModel):
+    file_id: Optional[str] = None
+    """The ID of the specific File the citation is from."""
+
+    quote: Optional[str] = None
+    """The specific quote in the file."""
+
+
+class FileCitationDeltaAnnotation(BaseModel):
+    index: int
+    """The index of the annotation in the text content part."""
+
+    type: Literal["file_citation"]
+    """Always `file_citation`."""
+
+    end_index: Optional[int] = None
+
+    file_citation: Optional[FileCitation] = None
+
+    start_index: Optional[int] = None
+
+    text: Optional[str] = None
+    """The text in the message content that needs to be replaced."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/file_path_annotation.py b/portkey_ai/_vendor/openai/types/beta/threads/file_path_annotation.py
new file mode 100644
index 00000000..9812737e
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/file_path_annotation.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FilePathAnnotation", "FilePath"]
+
+
+class FilePath(BaseModel):
+    file_id: str
+    """The ID of the file that was generated."""
+
+
+class FilePathAnnotation(BaseModel):
+    end_index: int
+
+    file_path: FilePath
+
+    start_index: int
+
+    text: str
+    """The text in the message content that needs to be replaced."""
+
+    type: Literal["file_path"]
+    """Always `file_path`."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/file_path_delta_annotation.py b/portkey_ai/_vendor/openai/types/beta/threads/file_path_delta_annotation.py
new file mode 100644
index 00000000..0cbb445e
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/file_path_delta_annotation.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FilePathDeltaAnnotation", "FilePath"]
+
+
+class FilePath(BaseModel):
+    file_id: Optional[str] = None
+    """The ID of the file that was generated."""
+
+
+class FilePathDeltaAnnotation(BaseModel):
+    index: int
+    """The index of the annotation in the text content part."""
+
+    type: Literal["file_path"]
+    """Always `file_path`."""
+
+    end_index: Optional[int] = None
+
+    file_path: Optional[FilePath] = None
+
+    start_index: Optional[int] = None
+
+    text: Optional[str] = None
+    """The text in the message content that needs to be replaced."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/image_file.py b/portkey_ai/_vendor/openai/types/beta/threads/image_file.py
new file mode 100644
index 00000000..a0b07666
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/image_file.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ...._models import BaseModel
+
+__all__ = ["ImageFile"]
+
+
+class ImageFile(BaseModel):
+    file_id: str
+    """
+    The [File](https://platform.openai.com/docs/api-reference/files) ID of the image
+    in the message content.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/image_file_content_block.py b/portkey_ai/_vendor/openai/types/beta/threads/image_file_content_block.py
new file mode 100644
index 00000000..a9099990
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/image_file_content_block.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .image_file import ImageFile
+
+__all__ = ["ImageFileContentBlock"]
+
+
+class ImageFileContentBlock(BaseModel):
+    image_file: ImageFile
+
+    type: Literal["image_file"]
+    """Always `image_file`."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/image_file_delta.py b/portkey_ai/_vendor/openai/types/beta/threads/image_file_delta.py
new file mode 100644
index 00000000..b0b1d32f
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/image_file_delta.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ...._models import BaseModel
+
+__all__ = ["ImageFileDelta"]
+
+
+class ImageFileDelta(BaseModel):
+    file_id: Optional[str] = None
+    """
+    The [File](https://platform.openai.com/docs/api-reference/files) ID of the image
+    in the message content.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/image_file_delta_block.py b/portkey_ai/_vendor/openai/types/beta/threads/image_file_delta_block.py
new file mode 100644
index 00000000..0a5a2e8a
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/image_file_delta_block.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .image_file_delta import ImageFileDelta
+
+__all__ = ["ImageFileDeltaBlock"]
+
+
+class ImageFileDeltaBlock(BaseModel):
+    index: int
+    """The index of the content part in the message."""
+
+    type: Literal["image_file"]
+    """Always `image_file`."""
+
+    image_file: Optional[ImageFileDelta] = None
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/message.py b/portkey_ai/_vendor/openai/types/beta/threads/message.py
new file mode 100644
index 00000000..0a5d0eeb
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/message.py
@@ -0,0 +1,91 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .message_content import MessageContent
+from ..file_search_tool import FileSearchTool
+from ..code_interpreter_tool import CodeInterpreterTool
+
+__all__ = ["Message", "Attachment", "AttachmentTool", "IncompleteDetails"]
+
+AttachmentTool = Union[CodeInterpreterTool, FileSearchTool]
+
+
+class Attachment(BaseModel):
+    file_id: Optional[str] = None
+    """The ID of the file to attach to the message."""
+
+    tools: Optional[List[AttachmentTool]] = None
+    """The tools to add this file to."""
+
+
+class IncompleteDetails(BaseModel):
+    reason: Literal[
+        "content_filter", "max_tokens", "run_cancelled", "run_expired", "run_failed"
+    ]
+    """The reason the message is incomplete."""
+
+
+class Message(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    assistant_id: Optional[str] = None
+    """
+    If applicable, the ID of the
+    [assistant](https://platform.openai.com/docs/api-reference/assistants) that
+    authored this message.
+    """
+
+    attachments: Optional[List[Attachment]] = None
+    """A list of files attached to the message, and the tools they were added to."""
+
+    completed_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the message was completed."""
+
+    content: List[MessageContent]
+    """The content of the message in array of text and/or images."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the message was created."""
+
+    incomplete_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the message was marked as incomplete."""
+
+    incomplete_details: Optional[IncompleteDetails] = None
+    """On an incomplete message, details about why the message is incomplete."""
+
+    metadata: Optional[object] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    object: Literal["thread.message"]
+    """The object type, which is always `thread.message`."""
+
+    role: Literal["user", "assistant"]
+    """The entity that produced the message. One of `user` or `assistant`."""
+
+    run_id: Optional[str] = None
+    """
+    The ID of the [run](https://platform.openai.com/docs/api-reference/runs)
+    associated with the creation of this message. Value is `null` when messages are
+    created manually using the create message or create thread endpoints.
+    """
+
+    status: Literal["in_progress", "incomplete", "completed"]
+    """
+    The status of the message, which can be either `in_progress`, `incomplete`, or
+    `completed`.
+    """
+
+    thread_id: str
+    """
+    The [thread](https://platform.openai.com/docs/api-reference/threads) ID that
+    this message belongs to.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/message_content.py b/portkey_ai/_vendor/openai/types/beta/threads/message_content.py
new file mode 100644
index 00000000..87e891e3
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/message_content.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ...._utils import PropertyInfo
+from .text_content_block import TextContentBlock
+from .image_file_content_block import ImageFileContentBlock
+
+__all__ = ["MessageContent"]
+
+MessageContent = Annotated[
+    Union[ImageFileContentBlock, TextContentBlock], PropertyInfo(discriminator="type")
+]
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/message_content_delta.py b/portkey_ai/_vendor/openai/types/beta/threads/message_content_delta.py
new file mode 100644
index 00000000..7cfd7d9d
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/message_content_delta.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ...._utils import PropertyInfo
+from .text_delta_block import TextDeltaBlock
+from .image_file_delta_block import ImageFileDeltaBlock
+
+__all__ = ["MessageContentDelta"]
+
+MessageContentDelta = Annotated[
+    Union[ImageFileDeltaBlock, TextDeltaBlock], PropertyInfo(discriminator="type")
+]
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/message_create_params.py b/portkey_ai/_vendor/openai/types/beta/threads/message_create_params.py
new file mode 100644
index 00000000..5cead598
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/message_create_params.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from ..file_search_tool_param import FileSearchToolParam
+from ..code_interpreter_tool_param import CodeInterpreterToolParam
+
+__all__ = ["MessageCreateParams", "Attachment", "AttachmentTool"]
+
+
+class MessageCreateParams(TypedDict, total=False):
+    content: Required[str]
+    """The content of the message."""
+
+    role: Required[Literal["user", "assistant"]]
+    """The role of the entity that is creating the message. Allowed values include:
+
+    - `user`: Indicates the message is sent by an actual user and should be used in
+      most cases to represent user-generated messages.
+    - `assistant`: Indicates the message is generated by the assistant. Use this
+      value to insert messages from the assistant into the conversation.
+    """
+
+    attachments: Optional[Iterable[Attachment]]
+    """A list of files attached to the message, and the tools they should be added to."""
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+
+AttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+
+
+class Attachment(TypedDict, total=False):
+    file_id: str
+    """The ID of the file to attach to the message."""
+
+    tools: Iterable[AttachmentTool]
+    """The tools to add this file to."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/message_deleted.py b/portkey_ai/_vendor/openai/types/beta/threads/message_deleted.py
new file mode 100644
index 00000000..48210777
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/message_deleted.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["MessageDeleted"]
+
+
+class MessageDeleted(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: Literal["thread.message.deleted"]
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/message_delta.py b/portkey_ai/_vendor/openai/types/beta/threads/message_delta.py
new file mode 100644
index 00000000..ecd0dfe3
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/message_delta.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .message_content_delta import MessageContentDelta
+
+__all__ = ["MessageDelta"]
+
+
+class MessageDelta(BaseModel):
+    content: Optional[List[MessageContentDelta]] = None
+    """The content of the message in array of text and/or images."""
+
+    role: Optional[Literal["user", "assistant"]] = None
+    """The entity that produced the message. One of `user` or `assistant`."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/message_delta_event.py b/portkey_ai/_vendor/openai/types/beta/threads/message_delta_event.py
new file mode 100644
index 00000000..3811cef6
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/message_delta_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .message_delta import MessageDelta
+
+__all__ = ["MessageDeltaEvent"]
+
+
+class MessageDeltaEvent(BaseModel):
+    id: str
+    """The identifier of the message, which can be referenced in API endpoints."""
+
+    delta: MessageDelta
+    """The delta containing the fields that have changed on the Message."""
+
+    object: Literal["thread.message.delta"]
+    """The object type, which is always `thread.message.delta`."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/message_list_params.py b/portkey_ai/_vendor/openai/types/beta/threads/message_list_params.py
new file mode 100644
index 00000000..18c2442f
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/message_list_params.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["MessageListParams"]
+
+
+class MessageListParams(TypedDict, total=False):
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    before: str
+    """A cursor for use in pagination.
+
+    `before` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include before=obj_foo in order to fetch the previous page
+    of the list.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
+
+    run_id: str
+    """Filter messages by the run ID that generated them."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/message_update_params.py b/portkey_ai/_vendor/openai/types/beta/threads/message_update_params.py
new file mode 100644
index 00000000..7000f331
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/message_update_params.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Required, TypedDict
+
+__all__ = ["MessageUpdateParams"]
+
+
+class MessageUpdateParams(TypedDict, total=False):
+    thread_id: Required[str]
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/required_action_function_tool_call.py b/portkey_ai/_vendor/openai/types/beta/threads/required_action_function_tool_call.py
new file mode 100644
index 00000000..a24dfd06
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/required_action_function_tool_call.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["RequiredActionFunctionToolCall", "Function"]
+
+
+class Function(BaseModel):
+    arguments: str
+    """The arguments that the model expects you to pass to the function."""
+
+    name: str
+    """The name of the function."""
+
+
+class RequiredActionFunctionToolCall(BaseModel):
+    id: str
+    """The ID of the tool call.
+
+    This ID must be referenced when you submit the tool outputs in using the
+    [Submit tool outputs to run](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs)
+    endpoint.
+    """
+
+    function: Function
+    """The function definition."""
+
+    type: Literal["function"]
+    """The type of tool call the output is required for.
+
+    For now, this is always `function`.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/run.py b/portkey_ai/_vendor/openai/types/beta/threads/run.py
new file mode 100644
index 00000000..6c118f27
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/run.py
@@ -0,0 +1,230 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .run_status import RunStatus
+from ..assistant_tool import AssistantTool
+from ..assistant_tool_choice_option import AssistantToolChoiceOption
+from ..assistant_response_format_option import AssistantResponseFormatOption
+from .required_action_function_tool_call import RequiredActionFunctionToolCall
+
+__all__ = [
+    "Run",
+    "IncompleteDetails",
+    "LastError",
+    "RequiredAction",
+    "RequiredActionSubmitToolOutputs",
+    "TruncationStrategy",
+    "Usage",
+]
+
+
+class IncompleteDetails(BaseModel):
+    reason: Optional[Literal["max_completion_tokens", "max_prompt_tokens"]] = None
+    """The reason why the run is incomplete.
+
+    This will point to which specific token limit was reached over the course of the
+    run.
+    """
+
+
+class LastError(BaseModel):
+    code: Literal["server_error", "rate_limit_exceeded", "invalid_prompt"]
+    """One of `server_error`, `rate_limit_exceeded`, or `invalid_prompt`."""
+
+    message: str
+    """A human-readable description of the error."""
+
+
+class RequiredActionSubmitToolOutputs(BaseModel):
+    tool_calls: List[RequiredActionFunctionToolCall]
+    """A list of the relevant tool calls."""
+
+
+class RequiredAction(BaseModel):
+    submit_tool_outputs: RequiredActionSubmitToolOutputs
+    """Details on the tool outputs needed for this run to continue."""
+
+    type: Literal["submit_tool_outputs"]
+    """For now, this is always `submit_tool_outputs`."""
+
+
+class TruncationStrategy(BaseModel):
+    type: Literal["auto", "last_messages"]
+    """The truncation strategy to use for the thread.
+
+    The default is `auto`. If set to `last_messages`, the thread will be truncated
+    to the n most recent messages in the thread. When set to `auto`, messages in the
+    middle of the thread will be dropped to fit the context length of the model,
+    `max_prompt_tokens`.
+    """
+
+    last_messages: Optional[int] = None
+    """
+    The number of most recent messages from the thread when constructing the context
+    for the run.
+    """
+
+
+class Usage(BaseModel):
+    completion_tokens: int
+    """Number of completion tokens used over the course of the run."""
+
+    prompt_tokens: int
+    """Number of prompt tokens used over the course of the run."""
+
+    total_tokens: int
+    """Total number of tokens used (prompt + completion)."""
+
+
+class Run(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    assistant_id: str
+    """
+    The ID of the
+    [assistant](https://platform.openai.com/docs/api-reference/assistants) used for
+    execution of this run.
+    """
+
+    cancelled_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the run was cancelled."""
+
+    completed_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the run was completed."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the run was created."""
+
+    expires_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the run will expire."""
+
+    failed_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the run failed."""
+
+    incomplete_details: Optional[IncompleteDetails] = None
+    """Details on why the run is incomplete.
+
+    Will be `null` if the run is not incomplete.
+    """
+
+    instructions: str
+    """
+    The instructions that the
+    [assistant](https://platform.openai.com/docs/api-reference/assistants) used for
+    this run.
+    """
+
+    last_error: Optional[LastError] = None
+    """The last error associated with this run. Will be `null` if there are no errors."""
+
+    max_completion_tokens: Optional[int] = None
+    """
+    The maximum number of completion tokens specified to have been used over the
+    course of the run.
+    """
+
+    max_prompt_tokens: Optional[int] = None
+    """
+    The maximum number of prompt tokens specified to have been used over the course
+    of the run.
+    """
+
+    metadata: Optional[object] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    model: str
+    """
+    The model that the
+    [assistant](https://platform.openai.com/docs/api-reference/assistants) used for
+    this run.
+    """
+
+    object: Literal["thread.run"]
+    """The object type, which is always `thread.run`."""
+
+    required_action: Optional[RequiredAction] = None
+    """Details on the action required to continue the run.
+
+    Will be `null` if no action is required.
+    """
+
+    response_format: Optional[AssistantResponseFormatOption] = None
+    """Specifies the format that the model must output.
+
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    started_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the run was started."""
+
+    status: RunStatus
+    """
+    The status of the run, which can be either `queued`, `in_progress`,
+    `requires_action`, `cancelling`, `cancelled`, `failed`, `completed`, or
+    `expired`.
+    """
+
+    thread_id: str
+    """
+    The ID of the [thread](https://platform.openai.com/docs/api-reference/threads)
+    that was executed on as a part of this run.
+    """
+
+    tool_choice: Optional[AssistantToolChoiceOption] = None
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tools and instead generates a message. `auto` is the default value
+    and means the model can pick between generating a message or calling one or more
+    tools. `required` means the model must call one or more tools before responding
+    to the user. Specifying a particular tool like `{"type": "file_search"}` or
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+    """
+
+    tools: List[AssistantTool]
+    """
+    The list of tools that the
+    [assistant](https://platform.openai.com/docs/api-reference/assistants) used for
+    this run.
+    """
+
+    truncation_strategy: Optional[TruncationStrategy] = None
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the intial context window of the run.
+    """
+
+    usage: Optional[Usage] = None
+    """Usage statistics related to the run.
+
+    This value will be `null` if the run is not in a terminal state (i.e.
+    `in_progress`, `queued`, etc.).
+    """
+
+    temperature: Optional[float] = None
+    """The sampling temperature used for this run. If not set, defaults to 1."""
+
+    top_p: Optional[float] = None
+    """The nucleus sampling value used for this run. If not set, defaults to 1."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/run_create_params.py b/portkey_ai/_vendor/openai/types/beta/threads/run_create_params.py
new file mode 100644
index 00000000..2e4823ba
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/run_create_params.py
@@ -0,0 +1,237 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from ..assistant_tool_param import AssistantToolParam
+from ..file_search_tool_param import FileSearchToolParam
+from ..code_interpreter_tool_param import CodeInterpreterToolParam
+from ..assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from ..assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = [
+    "RunCreateParamsBase",
+    "AdditionalMessage",
+    "AdditionalMessageAttachment",
+    "AdditionalMessageAttachmentTool",
+    "TruncationStrategy",
+    "RunCreateParamsNonStreaming",
+    "RunCreateParamsStreaming",
+]
+
+
+class RunCreateParamsBase(TypedDict, total=False):
+    assistant_id: Required[str]
+    """
+    The ID of the
+    [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+    execute this run.
+    """
+
+    additional_instructions: Optional[str]
+    """Appends additional instructions at the end of the instructions for the run.
+
+    This is useful for modifying the behavior on a per-run basis without overriding
+    other instructions.
+    """
+
+    additional_messages: Optional[Iterable[AdditionalMessage]]
+    """Adds additional messages to the thread before creating the run."""
+
+    instructions: Optional[str]
+    """
+    Overrides the
+    [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+    of the assistant. This is useful for modifying the behavior on a per-run basis.
+    """
+
+    max_completion_tokens: Optional[int]
+    """
+    The maximum number of completion tokens that may be used over the course of the
+    run. The run will make a best effort to use only the number of completion tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    completion tokens specified, the run will end with status `incomplete`. See
+    `incomplete_details` for more info.
+    """
+
+    max_prompt_tokens: Optional[int]
+    """The maximum number of prompt tokens that may be used over the course of the run.
+
+    The run will make a best effort to use only the number of prompt tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    prompt tokens specified, the run will end with status `incomplete`. See
+    `incomplete_details` for more info.
+    """
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    model: Union[
+        str,
+        Literal[
+            "gpt-4-turbo",
+            "gpt-4-turbo-2024-04-09",
+            "gpt-4-0125-preview",
+            "gpt-4-turbo-preview",
+            "gpt-4-1106-preview",
+            "gpt-4-vision-preview",
+            "gpt-4",
+            "gpt-4-0314",
+            "gpt-4-0613",
+            "gpt-4-32k",
+            "gpt-4-32k-0314",
+            "gpt-4-32k-0613",
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-16k",
+            "gpt-3.5-turbo-0613",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-0125",
+            "gpt-3.5-turbo-16k-0613",
+        ],
+        None,
+    ]
+    """
+    The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+    be used to execute this run. If a value is provided here, it will override the
+    model associated with the assistant. If not, the model associated with the
+    assistant will be used.
+    """
+
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
+    tool_choice: Optional[AssistantToolChoiceOptionParam]
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tools and instead generates a message. `auto` is the default value
+    and means the model can pick between generating a message or calling one or more
+    tools. `required` means the model must call one or more tools before responding
+    to the user. Specifying a particular tool like `{"type": "file_search"}` or
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+    """
+
+    tools: Optional[Iterable[AssistantToolParam]]
+    """Override the tools the assistant can use for this run.
+
+    This is useful for modifying the behavior on a per-run basis.
+    """
+
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
+    """
+
+    truncation_strategy: Optional[TruncationStrategy]
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the intial context window of the run.
+    """
+
+
+AdditionalMessageAttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+
+
+class AdditionalMessageAttachment(TypedDict, total=False):
+    file_id: str
+    """The ID of the file to attach to the message."""
+
+    tools: Iterable[AdditionalMessageAttachmentTool]
+    """The tools to add this file to."""
+
+
+class AdditionalMessage(TypedDict, total=False):
+    content: Required[str]
+    """The content of the message."""
+
+    role: Required[Literal["user", "assistant"]]
+    """The role of the entity that is creating the message. Allowed values include:
+
+    - `user`: Indicates the message is sent by an actual user and should be used in
+      most cases to represent user-generated messages.
+    - `assistant`: Indicates the message is generated by the assistant. Use this
+      value to insert messages from the assistant into the conversation.
+    """
+
+    attachments: Optional[Iterable[AdditionalMessageAttachment]]
+    """A list of files attached to the message, and the tools they should be added to."""
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+
+class TruncationStrategy(TypedDict, total=False):
+    type: Required[Literal["auto", "last_messages"]]
+    """The truncation strategy to use for the thread.
+
+    The default is `auto`. If set to `last_messages`, the thread will be truncated
+    to the n most recent messages in the thread. When set to `auto`, messages in the
+    middle of the thread will be dropped to fit the context length of the model,
+    `max_prompt_tokens`.
+    """
+
+    last_messages: Optional[int]
+    """
+    The number of most recent messages from the thread when constructing the context
+    for the run.
+    """
+
+
+class RunCreateParamsNonStreaming(RunCreateParamsBase):
+    stream: Optional[Literal[False]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
+
+
+class RunCreateParamsStreaming(RunCreateParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
+
+
+RunCreateParams = Union[RunCreateParamsNonStreaming, RunCreateParamsStreaming]
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/run_list_params.py b/portkey_ai/_vendor/openai/types/beta/threads/run_list_params.py
new file mode 100644
index 00000000..1e32bca4
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/run_list_params.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["RunListParams"]
+
+
+class RunListParams(TypedDict, total=False):
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    before: str
+    """A cursor for use in pagination.
+
+    `before` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include before=obj_foo in order to fetch the previous page
+    of the list.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/run_status.py b/portkey_ai/_vendor/openai/types/beta/threads/run_status.py
new file mode 100644
index 00000000..c9d9da79
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/run_status.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+__all__ = ["RunStatus"]
+
+RunStatus = Literal[
+    "queued",
+    "in_progress",
+    "requires_action",
+    "cancelling",
+    "cancelled",
+    "failed",
+    "completed",
+    "expired",
+]
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/run_submit_tool_outputs_params.py b/portkey_ai/_vendor/openai/types/beta/threads/run_submit_tool_outputs_params.py
new file mode 100644
index 00000000..1958e46c
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/run_submit_tool_outputs_params.py
@@ -0,0 +1,54 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = [
+    "RunSubmitToolOutputsParamsBase",
+    "ToolOutput",
+    "RunSubmitToolOutputsParamsNonStreaming",
+    "RunSubmitToolOutputsParamsStreaming",
+]
+
+
+class RunSubmitToolOutputsParamsBase(TypedDict, total=False):
+    thread_id: Required[str]
+
+    tool_outputs: Required[Iterable[ToolOutput]]
+    """A list of tools for which the outputs are being submitted."""
+
+
+class ToolOutput(TypedDict, total=False):
+    output: str
+    """The output of the tool call to be submitted to continue the run."""
+
+    tool_call_id: str
+    """
+    The ID of the tool call in the `required_action` object within the run object
+    the output is being submitted for.
+    """
+
+
+class RunSubmitToolOutputsParamsNonStreaming(RunSubmitToolOutputsParamsBase):
+    stream: Optional[Literal[False]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
+
+
+class RunSubmitToolOutputsParamsStreaming(RunSubmitToolOutputsParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
+
+
+RunSubmitToolOutputsParams = Union[
+    RunSubmitToolOutputsParamsNonStreaming, RunSubmitToolOutputsParamsStreaming
+]
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/run_update_params.py b/portkey_ai/_vendor/openai/types/beta/threads/run_update_params.py
new file mode 100644
index 00000000..e595eac8
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/run_update_params.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Required, TypedDict
+
+__all__ = ["RunUpdateParams"]
+
+
+class RunUpdateParams(TypedDict, total=False):
+    thread_id: Required[str]
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/__init__.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/__init__.py
new file mode 100644
index 00000000..4d04f030
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/__init__.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .run_step import RunStep as RunStep
+from .tool_call import ToolCall as ToolCall
+from .run_step_delta import RunStepDelta as RunStepDelta
+from .tool_call_delta import ToolCallDelta as ToolCallDelta
+from .step_list_params import StepListParams as StepListParams
+from .function_tool_call import FunctionToolCall as FunctionToolCall
+from .run_step_delta_event import RunStepDeltaEvent as RunStepDeltaEvent
+from .code_interpreter_logs import CodeInterpreterLogs as CodeInterpreterLogs
+from .file_search_tool_call import FileSearchToolCall as FileSearchToolCall
+from .tool_call_delta_object import ToolCallDeltaObject as ToolCallDeltaObject
+from .tool_calls_step_details import ToolCallsStepDetails as ToolCallsStepDetails
+from .function_tool_call_delta import FunctionToolCallDelta as FunctionToolCallDelta
+from .code_interpreter_tool_call import (
+    CodeInterpreterToolCall as CodeInterpreterToolCall,
+)
+from .file_search_tool_call_delta import (
+    FileSearchToolCallDelta as FileSearchToolCallDelta,
+)
+from .run_step_delta_message_delta import (
+    RunStepDeltaMessageDelta as RunStepDeltaMessageDelta,
+)
+from .code_interpreter_output_image import (
+    CodeInterpreterOutputImage as CodeInterpreterOutputImage,
+)
+from .message_creation_step_details import (
+    MessageCreationStepDetails as MessageCreationStepDetails,
+)
+from .code_interpreter_tool_call_delta import (
+    CodeInterpreterToolCallDelta as CodeInterpreterToolCallDelta,
+)
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_logs.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_logs.py
new file mode 100644
index 00000000..0bf8c1da
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_logs.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["CodeInterpreterLogs"]
+
+
+class CodeInterpreterLogs(BaseModel):
+    index: int
+    """The index of the output in the outputs array."""
+
+    type: Literal["logs"]
+    """Always `logs`."""
+
+    logs: Optional[str] = None
+    """The text output from the Code Interpreter tool call."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_output_image.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_output_image.py
new file mode 100644
index 00000000..2257f37e
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_output_image.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["CodeInterpreterOutputImage", "Image"]
+
+
+class Image(BaseModel):
+    file_id: Optional[str] = None
+    """
+    The [file](https://platform.openai.com/docs/api-reference/files) ID of the
+    image.
+    """
+
+
+class CodeInterpreterOutputImage(BaseModel):
+    index: int
+    """The index of the output in the outputs array."""
+
+    type: Literal["image"]
+    """Always `image`."""
+
+    image: Optional[Image] = None
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_tool_call.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_tool_call.py
new file mode 100644
index 00000000..0502f787
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_tool_call.py
@@ -0,0 +1,71 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal, Annotated
+
+from ....._utils import PropertyInfo
+from ....._models import BaseModel
+
+__all__ = [
+    "CodeInterpreterToolCall",
+    "CodeInterpreter",
+    "CodeInterpreterOutput",
+    "CodeInterpreterOutputLogs",
+    "CodeInterpreterOutputImage",
+    "CodeInterpreterOutputImageImage",
+]
+
+
+class CodeInterpreterOutputLogs(BaseModel):
+    logs: str
+    """The text output from the Code Interpreter tool call."""
+
+    type: Literal["logs"]
+    """Always `logs`."""
+
+
+class CodeInterpreterOutputImageImage(BaseModel):
+    file_id: str
+    """
+    The [file](https://platform.openai.com/docs/api-reference/files) ID of the
+    image.
+    """
+
+
+class CodeInterpreterOutputImage(BaseModel):
+    image: CodeInterpreterOutputImageImage
+
+    type: Literal["image"]
+    """Always `image`."""
+
+
+CodeInterpreterOutput = Annotated[
+    Union[CodeInterpreterOutputLogs, CodeInterpreterOutputImage],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class CodeInterpreter(BaseModel):
+    input: str
+    """The input to the Code Interpreter tool call."""
+
+    outputs: List[CodeInterpreterOutput]
+    """The outputs from the Code Interpreter tool call.
+
+    Code Interpreter can output one or more items, including text (`logs`) or images
+    (`image`). Each of these are represented by a different object type.
+    """
+
+
+class CodeInterpreterToolCall(BaseModel):
+    id: str
+    """The ID of the tool call."""
+
+    code_interpreter: CodeInterpreter
+    """The Code Interpreter tool call definition."""
+
+    type: Literal["code_interpreter"]
+    """The type of tool call.
+
+    This is always going to be `code_interpreter` for this type of tool call.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
new file mode 100644
index 00000000..781d807f
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated
+
+from ....._utils import PropertyInfo
+from ....._models import BaseModel
+from .code_interpreter_logs import CodeInterpreterLogs
+from .code_interpreter_output_image import CodeInterpreterOutputImage
+
+__all__ = ["CodeInterpreterToolCallDelta", "CodeInterpreter", "CodeInterpreterOutput"]
+
+CodeInterpreterOutput = Annotated[
+    Union[CodeInterpreterLogs, CodeInterpreterOutputImage],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class CodeInterpreter(BaseModel):
+    input: Optional[str] = None
+    """The input to the Code Interpreter tool call."""
+
+    outputs: Optional[List[CodeInterpreterOutput]] = None
+    """The outputs from the Code Interpreter tool call.
+
+    Code Interpreter can output one or more items, including text (`logs`) or images
+    (`image`). Each of these are represented by a different object type.
+    """
+
+
+class CodeInterpreterToolCallDelta(BaseModel):
+    index: int
+    """The index of the tool call in the tool calls array."""
+
+    type: Literal["code_interpreter"]
+    """The type of tool call.
+
+    This is always going to be `code_interpreter` for this type of tool call.
+    """
+
+    id: Optional[str] = None
+    """The ID of the tool call."""
+
+    code_interpreter: Optional[CodeInterpreter] = None
+    """The Code Interpreter tool call definition."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/file_search_tool_call.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/file_search_tool_call.py
new file mode 100644
index 00000000..57c0ca9a
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/file_search_tool_call.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["FileSearchToolCall"]
+
+
+class FileSearchToolCall(BaseModel):
+    id: str
+    """The ID of the tool call object."""
+
+    file_search: object
+    """For now, this is always going to be an empty object."""
+
+    type: Literal["file_search"]
+    """The type of tool call.
+
+    This is always going to be `file_search` for this type of tool call.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/file_search_tool_call_delta.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/file_search_tool_call_delta.py
new file mode 100644
index 00000000..df5ac217
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/file_search_tool_call_delta.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["FileSearchToolCallDelta"]
+
+
+class FileSearchToolCallDelta(BaseModel):
+    file_search: object
+    """For now, this is always going to be an empty object."""
+
+    index: int
+    """The index of the tool call in the tool calls array."""
+
+    type: Literal["file_search"]
+    """The type of tool call.
+
+    This is always going to be `file_search` for this type of tool call.
+    """
+
+    id: Optional[str] = None
+    """The ID of the tool call object."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/function_tool_call.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/function_tool_call.py
new file mode 100644
index 00000000..b1d354f8
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/function_tool_call.py
@@ -0,0 +1,38 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["FunctionToolCall", "Function"]
+
+
+class Function(BaseModel):
+    arguments: str
+    """The arguments passed to the function."""
+
+    name: str
+    """The name of the function."""
+
+    output: Optional[str] = None
+    """The output of the function.
+
+    This will be `null` if the outputs have not been
+    [submitted](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs)
+    yet.
+    """
+
+
+class FunctionToolCall(BaseModel):
+    id: str
+    """The ID of the tool call object."""
+
+    function: Function
+    """The definition of the function that was called."""
+
+    type: Literal["function"]
+    """The type of tool call.
+
+    This is always going to be `function` for this type of tool call.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/function_tool_call_delta.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/function_tool_call_delta.py
new file mode 100644
index 00000000..faaf026f
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/function_tool_call_delta.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["FunctionToolCallDelta", "Function"]
+
+
+class Function(BaseModel):
+    arguments: Optional[str] = None
+    """The arguments passed to the function."""
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    output: Optional[str] = None
+    """The output of the function.
+
+    This will be `null` if the outputs have not been
+    [submitted](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs)
+    yet.
+    """
+
+
+class FunctionToolCallDelta(BaseModel):
+    index: int
+    """The index of the tool call in the tool calls array."""
+
+    type: Literal["function"]
+    """The type of tool call.
+
+    This is always going to be `function` for this type of tool call.
+    """
+
+    id: Optional[str] = None
+    """The ID of the tool call object."""
+
+    function: Optional[Function] = None
+    """The definition of the function that was called."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/message_creation_step_details.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/message_creation_step_details.py
new file mode 100644
index 00000000..73439079
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/message_creation_step_details.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["MessageCreationStepDetails", "MessageCreation"]
+
+
+class MessageCreation(BaseModel):
+    message_id: str
+    """The ID of the message that was created by this run step."""
+
+
+class MessageCreationStepDetails(BaseModel):
+    message_creation: MessageCreation
+
+    type: Literal["message_creation"]
+    """Always `message_creation`."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/run_step.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/run_step.py
new file mode 100644
index 00000000..f591c20d
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/run_step.py
@@ -0,0 +1,113 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated
+
+from ....._utils import PropertyInfo
+from ....._models import BaseModel
+from .tool_calls_step_details import ToolCallsStepDetails
+from .message_creation_step_details import MessageCreationStepDetails
+
+__all__ = ["RunStep", "LastError", "StepDetails", "Usage"]
+
+
+class LastError(BaseModel):
+    code: Literal["server_error", "rate_limit_exceeded"]
+    """One of `server_error` or `rate_limit_exceeded`."""
+
+    message: str
+    """A human-readable description of the error."""
+
+
+StepDetails = Annotated[
+    Union[MessageCreationStepDetails, ToolCallsStepDetails],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class Usage(BaseModel):
+    completion_tokens: int
+    """Number of completion tokens used over the course of the run step."""
+
+    prompt_tokens: int
+    """Number of prompt tokens used over the course of the run step."""
+
+    total_tokens: int
+    """Total number of tokens used (prompt + completion)."""
+
+
+class RunStep(BaseModel):
+    id: str
+    """The identifier of the run step, which can be referenced in API endpoints."""
+
+    assistant_id: str
+    """
+    The ID of the
+    [assistant](https://platform.openai.com/docs/api-reference/assistants)
+    associated with the run step.
+    """
+
+    cancelled_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the run step was cancelled."""
+
+    completed_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the run step completed."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the run step was created."""
+
+    expired_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the run step expired.
+
+    A step is considered expired if the parent run is expired.
+    """
+
+    failed_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the run step failed."""
+
+    last_error: Optional[LastError] = None
+    """The last error associated with this run step.
+
+    Will be `null` if there are no errors.
+    """
+
+    metadata: Optional[object] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    object: Literal["thread.run.step"]
+    """The object type, which is always `thread.run.step`."""
+
+    run_id: str
+    """
+    The ID of the [run](https://platform.openai.com/docs/api-reference/runs) that
+    this run step is a part of.
+    """
+
+    status: Literal["in_progress", "cancelled", "failed", "completed", "expired"]
+    """
+    The status of the run step, which can be either `in_progress`, `cancelled`,
+    `failed`, `completed`, or `expired`.
+    """
+
+    step_details: StepDetails
+    """The details of the run step."""
+
+    thread_id: str
+    """
+    The ID of the [thread](https://platform.openai.com/docs/api-reference/threads)
+    that was run.
+    """
+
+    type: Literal["message_creation", "tool_calls"]
+    """The type of run step, which can be either `message_creation` or `tool_calls`."""
+
+    usage: Optional[Usage] = None
+    """Usage statistics related to the run step.
+
+    This value will be `null` while the run step's status is `in_progress`.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/run_step_delta.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/run_step_delta.py
new file mode 100644
index 00000000..24bb5568
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/run_step_delta.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Annotated
+
+from ....._utils import PropertyInfo
+from ....._models import BaseModel
+from .tool_call_delta_object import ToolCallDeltaObject
+from .run_step_delta_message_delta import RunStepDeltaMessageDelta
+
+__all__ = ["RunStepDelta", "StepDetails"]
+
+StepDetails = Annotated[
+    Union[RunStepDeltaMessageDelta, ToolCallDeltaObject],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class RunStepDelta(BaseModel):
+    step_details: Optional[StepDetails] = None
+    """The details of the run step."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/run_step_delta_event.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/run_step_delta_event.py
new file mode 100644
index 00000000..7f3f92aa
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/run_step_delta_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+from .run_step_delta import RunStepDelta
+
+__all__ = ["RunStepDeltaEvent"]
+
+
+class RunStepDeltaEvent(BaseModel):
+    id: str
+    """The identifier of the run step, which can be referenced in API endpoints."""
+
+    delta: RunStepDelta
+    """The delta containing the fields that have changed on the run step."""
+
+    object: Literal["thread.run.step.delta"]
+    """The object type, which is always `thread.run.step.delta`."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/run_step_delta_message_delta.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/run_step_delta_message_delta.py
new file mode 100644
index 00000000..f58ed3d9
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/run_step_delta_message_delta.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["RunStepDeltaMessageDelta", "MessageCreation"]
+
+
+class MessageCreation(BaseModel):
+    message_id: Optional[str] = None
+    """The ID of the message that was created by this run step."""
+
+
+class RunStepDeltaMessageDelta(BaseModel):
+    type: Literal["message_creation"]
+    """Always `message_creation`."""
+
+    message_creation: Optional[MessageCreation] = None
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/step_list_params.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/step_list_params.py
new file mode 100644
index 00000000..606d4445
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/step_list_params.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["StepListParams"]
+
+
+class StepListParams(TypedDict, total=False):
+    thread_id: Required[str]
+
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    before: str
+    """A cursor for use in pagination.
+
+    `before` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include before=obj_foo in order to fetch the previous page
+    of the list.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/tool_call.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/tool_call.py
new file mode 100644
index 00000000..330f69ee
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/tool_call.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ....._utils import PropertyInfo
+from .function_tool_call import FunctionToolCall
+from .file_search_tool_call import FileSearchToolCall
+from .code_interpreter_tool_call import CodeInterpreterToolCall
+
+__all__ = ["ToolCall"]
+
+ToolCall = Annotated[
+    Union[CodeInterpreterToolCall, FileSearchToolCall, FunctionToolCall],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/tool_call_delta.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/tool_call_delta.py
new file mode 100644
index 00000000..90cfe065
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/tool_call_delta.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ....._utils import PropertyInfo
+from .function_tool_call_delta import FunctionToolCallDelta
+from .file_search_tool_call_delta import FileSearchToolCallDelta
+from .code_interpreter_tool_call_delta import CodeInterpreterToolCallDelta
+
+__all__ = ["ToolCallDelta"]
+
+ToolCallDelta = Annotated[
+    Union[CodeInterpreterToolCallDelta, FileSearchToolCallDelta, FunctionToolCallDelta],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/tool_call_delta_object.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/tool_call_delta_object.py
new file mode 100644
index 00000000..189dce77
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/tool_call_delta_object.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+from .tool_call_delta import ToolCallDelta
+
+__all__ = ["ToolCallDeltaObject"]
+
+
+class ToolCallDeltaObject(BaseModel):
+    type: Literal["tool_calls"]
+    """Always `tool_calls`."""
+
+    tool_calls: Optional[List[ToolCallDelta]] = None
+    """An array of tool calls the run step was involved in.
+
+    These can be associated with one of three types of tools: `code_interpreter`,
+    `file_search`, or `function`.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/runs/tool_calls_step_details.py b/portkey_ai/_vendor/openai/types/beta/threads/runs/tool_calls_step_details.py
new file mode 100644
index 00000000..a084d387
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/runs/tool_calls_step_details.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import Literal
+
+from .tool_call import ToolCall
+from ....._models import BaseModel
+
+__all__ = ["ToolCallsStepDetails"]
+
+
+class ToolCallsStepDetails(BaseModel):
+    tool_calls: List[ToolCall]
+    """An array of tool calls the run step was involved in.
+
+    These can be associated with one of three types of tools: `code_interpreter`,
+    `file_search`, or `function`.
+    """
+
+    type: Literal["tool_calls"]
+    """Always `tool_calls`."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/text.py b/portkey_ai/_vendor/openai/types/beta/threads/text.py
new file mode 100644
index 00000000..853bec29
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/text.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+
+from ...._models import BaseModel
+from .annotation import Annotation
+
+__all__ = ["Text"]
+
+
+class Text(BaseModel):
+    annotations: List[Annotation]
+
+    value: str
+    """The data that makes up the text."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/text_content_block.py b/portkey_ai/_vendor/openai/types/beta/threads/text_content_block.py
new file mode 100644
index 00000000..3706d6b9
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/text_content_block.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .text import Text
+from ...._models import BaseModel
+
+__all__ = ["TextContentBlock"]
+
+
+class TextContentBlock(BaseModel):
+    text: Text
+
+    type: Literal["text"]
+    """Always `text`."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/text_delta.py b/portkey_ai/_vendor/openai/types/beta/threads/text_delta.py
new file mode 100644
index 00000000..09cd3570
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/text_delta.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ...._models import BaseModel
+from .annotation_delta import AnnotationDelta
+
+__all__ = ["TextDelta"]
+
+
+class TextDelta(BaseModel):
+    annotations: Optional[List[AnnotationDelta]] = None
+
+    value: Optional[str] = None
+    """The data that makes up the text."""
diff --git a/portkey_ai/_vendor/openai/types/beta/threads/text_delta_block.py b/portkey_ai/_vendor/openai/types/beta/threads/text_delta_block.py
new file mode 100644
index 00000000..586116e0
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/threads/text_delta_block.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .text_delta import TextDelta
+
+__all__ = ["TextDeltaBlock"]
+
+
+class TextDeltaBlock(BaseModel):
+    index: int
+    """The index of the content part in the message."""
+
+    type: Literal["text"]
+    """Always `text`."""
+
+    text: Optional[TextDelta] = None
diff --git a/portkey_ai/_vendor/openai/types/beta/vector_store.py b/portkey_ai/_vendor/openai/types/beta/vector_store.py
new file mode 100644
index 00000000..488961b4
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/vector_store.py
@@ -0,0 +1,79 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["VectorStore", "FileCounts", "ExpiresAfter"]
+
+
+class FileCounts(BaseModel):
+    cancelled: int
+    """The number of files that were cancelled."""
+
+    completed: int
+    """The number of files that have been successfully processed."""
+
+    failed: int
+    """The number of files that have failed to process."""
+
+    in_progress: int
+    """The number of files that are currently being processed."""
+
+    total: int
+    """The total number of files."""
+
+
+class ExpiresAfter(BaseModel):
+    anchor: Literal["last_active_at"]
+    """Anchor timestamp after which the expiration policy applies.
+
+    Supported anchors: `last_active_at`.
+    """
+
+    days: int
+    """The number of days after the anchor time that the vector store will expire."""
+
+
+class VectorStore(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the vector store was created."""
+
+    file_counts: FileCounts
+
+    last_active_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the vector store was last active."""
+
+    metadata: Optional[object] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    name: str
+    """The name of the vector store."""
+
+    object: Literal["vector_store"]
+    """The object type, which is always `vector_store`."""
+
+    status: Literal["expired", "in_progress", "completed"]
+    """
+    The status of the vector store, which can be either `expired`, `in_progress`, or
+    `completed`. A status of `completed` indicates that the vector store is ready
+    for use.
+    """
+
+    usage_bytes: int
+    """The total number of bytes used by the files in the vector store."""
+
+    expires_after: Optional[ExpiresAfter] = None
+    """The expiration policy for a vector store."""
+
+    expires_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the vector store will expire."""
diff --git a/portkey_ai/_vendor/openai/types/beta/vector_store_create_params.py b/portkey_ai/_vendor/openai/types/beta/vector_store_create_params.py
new file mode 100644
index 00000000..f1a3abcb
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/vector_store_create_params.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["VectorStoreCreateParams", "ExpiresAfter"]
+
+
+class VectorStoreCreateParams(TypedDict, total=False):
+    expires_after: ExpiresAfter
+    """The expiration policy for a vector store."""
+
+    file_ids: List[str]
+    """
+    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+    the vector store should use. Useful for tools like `file_search` that can access
+    files.
+    """
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    name: str
+    """The name of the vector store."""
+
+
+class ExpiresAfter(TypedDict, total=False):
+    anchor: Required[Literal["last_active_at"]]
+    """Anchor timestamp after which the expiration policy applies.
+
+    Supported anchors: `last_active_at`.
+    """
+
+    days: Required[int]
+    """The number of days after the anchor time that the vector store will expire."""
diff --git a/portkey_ai/_vendor/openai/types/beta/vector_store_deleted.py b/portkey_ai/_vendor/openai/types/beta/vector_store_deleted.py
new file mode 100644
index 00000000..21ccda1d
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/vector_store_deleted.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["VectorStoreDeleted"]
+
+
+class VectorStoreDeleted(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: Literal["vector_store.deleted"]
diff --git a/portkey_ai/_vendor/openai/types/beta/vector_store_list_params.py b/portkey_ai/_vendor/openai/types/beta/vector_store_list_params.py
new file mode 100644
index 00000000..f39f6726
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/vector_store_list_params.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["VectorStoreListParams"]
+
+
+class VectorStoreListParams(TypedDict, total=False):
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    before: str
+    """A cursor for use in pagination.
+
+    `before` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include before=obj_foo in order to fetch the previous page
+    of the list.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/vector_store_update_params.py b/portkey_ai/_vendor/openai/types/beta/vector_store_update_params.py
new file mode 100644
index 00000000..0f9593e4
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/vector_store_update_params.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["VectorStoreUpdateParams", "ExpiresAfter"]
+
+
+class VectorStoreUpdateParams(TypedDict, total=False):
+    expires_after: Optional[ExpiresAfter]
+    """The expiration policy for a vector store."""
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    name: Optional[str]
+    """The name of the vector store."""
+
+
+class ExpiresAfter(TypedDict, total=False):
+    anchor: Required[Literal["last_active_at"]]
+    """Anchor timestamp after which the expiration policy applies.
+
+    Supported anchors: `last_active_at`.
+    """
+
+    days: Required[int]
+    """The number of days after the anchor time that the vector store will expire."""
diff --git a/portkey_ai/_vendor/openai/types/beta/vector_stores/__init__.py b/portkey_ai/_vendor/openai/types/beta/vector_stores/__init__.py
new file mode 100644
index 00000000..2f90ad7b
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/vector_stores/__init__.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .file_list_params import FileListParams as FileListParams
+from .vector_store_file import VectorStoreFile as VectorStoreFile
+from .file_create_params import FileCreateParams as FileCreateParams
+from .vector_store_file_batch import VectorStoreFileBatch as VectorStoreFileBatch
+from .file_batch_create_params import FileBatchCreateParams as FileBatchCreateParams
+from .vector_store_file_deleted import VectorStoreFileDeleted as VectorStoreFileDeleted
+from .file_batch_list_files_params import (
+    FileBatchListFilesParams as FileBatchListFilesParams,
+)
diff --git a/portkey_ai/_vendor/openai/types/beta/vector_stores/file_batch_create_params.py b/portkey_ai/_vendor/openai/types/beta/vector_stores/file_batch_create_params.py
new file mode 100644
index 00000000..08828297
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/vector_stores/file_batch_create_params.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Required, TypedDict
+
+__all__ = ["FileBatchCreateParams"]
+
+
+class FileBatchCreateParams(TypedDict, total=False):
+    file_ids: Required[List[str]]
+    """
+    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+    the vector store should use. Useful for tools like `file_search` that can access
+    files.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/vector_stores/file_batch_list_files_params.py b/portkey_ai/_vendor/openai/types/beta/vector_stores/file_batch_list_files_params.py
new file mode 100644
index 00000000..24dee7d5
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/vector_stores/file_batch_list_files_params.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["FileBatchListFilesParams"]
+
+
+class FileBatchListFilesParams(TypedDict, total=False):
+    vector_store_id: Required[str]
+
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    before: str
+    """A cursor for use in pagination.
+
+    `before` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include before=obj_foo in order to fetch the previous page
+    of the list.
+    """
+
+    filter: Literal["in_progress", "completed", "failed", "cancelled"]
+    """Filter by file status.
+
+    One of `in_progress`, `completed`, `failed`, `cancelled`.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/vector_stores/file_create_params.py b/portkey_ai/_vendor/openai/types/beta/vector_stores/file_create_params.py
new file mode 100644
index 00000000..2fee588a
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/vector_stores/file_create_params.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["FileCreateParams"]
+
+
+class FileCreateParams(TypedDict, total=False):
+    file_id: Required[str]
+    """
+    A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+    vector store should use. Useful for tools like `file_search` that can access
+    files.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/vector_stores/file_list_params.py b/portkey_ai/_vendor/openai/types/beta/vector_stores/file_list_params.py
new file mode 100644
index 00000000..23dd7f0d
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/vector_stores/file_list_params.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["FileListParams"]
+
+
+class FileListParams(TypedDict, total=False):
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    before: str
+    """A cursor for use in pagination.
+
+    `before` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include before=obj_foo in order to fetch the previous page
+    of the list.
+    """
+
+    filter: Literal["in_progress", "completed", "failed", "cancelled"]
+    """Filter by file status.
+
+    One of `in_progress`, `completed`, `failed`, `cancelled`.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/vector_stores/vector_store_file.py b/portkey_ai/_vendor/openai/types/beta/vector_stores/vector_store_file.py
new file mode 100644
index 00000000..dd8d925b
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/vector_stores/vector_store_file.py
@@ -0,0 +1,56 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["VectorStoreFile", "LastError"]
+
+
+class LastError(BaseModel):
+    code: Literal[
+        "internal_error", "file_not_found", "parsing_error", "unhandled_mime_type"
+    ]
+    """One of `server_error` or `rate_limit_exceeded`."""
+
+    message: str
+    """A human-readable description of the error."""
+
+
+class VectorStoreFile(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the vector store file was created."""
+
+    last_error: Optional[LastError] = None
+    """The last error associated with this vector store file.
+
+    Will be `null` if there are no errors.
+    """
+
+    object: Literal["vector_store.file"]
+    """The object type, which is always `vector_store.file`."""
+
+    status: Literal["in_progress", "completed", "cancelled", "failed"]
+    """
+    The status of the vector store file, which can be either `in_progress`,
+    `completed`, `cancelled`, or `failed`. The status `completed` indicates that the
+    vector store file is ready for use.
+    """
+
+    usage_bytes: int
+    """The total vector store usage in bytes.
+
+    Note that this may be different from the original file size.
+    """
+
+    vector_store_id: str
+    """
+    The ID of the
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    that the [File](https://platform.openai.com/docs/api-reference/files) is
+    attached to.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/vector_stores/vector_store_file_batch.py b/portkey_ai/_vendor/openai/types/beta/vector_stores/vector_store_file_batch.py
new file mode 100644
index 00000000..df130a58
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/vector_stores/vector_store_file_batch.py
@@ -0,0 +1,54 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["VectorStoreFileBatch", "FileCounts"]
+
+
+class FileCounts(BaseModel):
+    cancelled: int
+    """The number of files that where cancelled."""
+
+    completed: int
+    """The number of files that have been processed."""
+
+    failed: int
+    """The number of files that have failed to process."""
+
+    in_progress: int
+    """The number of files that are currently being processed."""
+
+    total: int
+    """The total number of files."""
+
+
+class VectorStoreFileBatch(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    created_at: int
+    """
+    The Unix timestamp (in seconds) for when the vector store files batch was
+    created.
+    """
+
+    file_counts: FileCounts
+
+    object: Literal["vector_store.files_batch"]
+    """The object type, which is always `vector_store.file_batch`."""
+
+    status: Literal["in_progress", "completed", "cancelled", "failed"]
+    """
+    The status of the vector store files batch, which can be either `in_progress`,
+    `completed`, `cancelled` or `failed`.
+    """
+
+    vector_store_id: str
+    """
+    The ID of the
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    that the [File](https://platform.openai.com/docs/api-reference/files) is
+    attached to.
+    """
diff --git a/portkey_ai/_vendor/openai/types/beta/vector_stores/vector_store_file_deleted.py b/portkey_ai/_vendor/openai/types/beta/vector_stores/vector_store_file_deleted.py
new file mode 100644
index 00000000..ae37f843
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/beta/vector_stores/vector_store_file_deleted.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["VectorStoreFileDeleted"]
+
+
+class VectorStoreFileDeleted(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: Literal["vector_store.file.deleted"]
diff --git a/portkey_ai/_vendor/openai/types/chat/__init__.py b/portkey_ai/_vendor/openai/types/chat/__init__.py
new file mode 100644
index 00000000..5a3c9be1
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/__init__.py
@@ -0,0 +1,60 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .chat_completion import ChatCompletion as ChatCompletion
+from .chat_completion_role import ChatCompletionRole as ChatCompletionRole
+from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
+from .chat_completion_message import ChatCompletionMessage as ChatCompletionMessage
+from .completion_create_params import CompletionCreateParams as CompletionCreateParams
+from .chat_completion_tool_param import (
+    ChatCompletionToolParam as ChatCompletionToolParam,
+)
+from .chat_completion_message_param import (
+    ChatCompletionMessageParam as ChatCompletionMessageParam,
+)
+from .chat_completion_token_logprob import (
+    ChatCompletionTokenLogprob as ChatCompletionTokenLogprob,
+)
+from .chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall as ChatCompletionMessageToolCall,
+)
+from .chat_completion_content_part_param import (
+    ChatCompletionContentPartParam as ChatCompletionContentPartParam,
+)
+from .chat_completion_tool_message_param import (
+    ChatCompletionToolMessageParam as ChatCompletionToolMessageParam,
+)
+from .chat_completion_user_message_param import (
+    ChatCompletionUserMessageParam as ChatCompletionUserMessageParam,
+)
+from .chat_completion_stream_options_param import (
+    ChatCompletionStreamOptionsParam as ChatCompletionStreamOptionsParam,
+)
+from .chat_completion_system_message_param import (
+    ChatCompletionSystemMessageParam as ChatCompletionSystemMessageParam,
+)
+from .chat_completion_function_message_param import (
+    ChatCompletionFunctionMessageParam as ChatCompletionFunctionMessageParam,
+)
+from .chat_completion_assistant_message_param import (
+    ChatCompletionAssistantMessageParam as ChatCompletionAssistantMessageParam,
+)
+from .chat_completion_content_part_text_param import (
+    ChatCompletionContentPartTextParam as ChatCompletionContentPartTextParam,
+)
+from .chat_completion_message_tool_call_param import (
+    ChatCompletionMessageToolCallParam as ChatCompletionMessageToolCallParam,
+)
+from .chat_completion_named_tool_choice_param import (
+    ChatCompletionNamedToolChoiceParam as ChatCompletionNamedToolChoiceParam,
+)
+from .chat_completion_content_part_image_param import (
+    ChatCompletionContentPartImageParam as ChatCompletionContentPartImageParam,
+)
+from .chat_completion_tool_choice_option_param import (
+    ChatCompletionToolChoiceOptionParam as ChatCompletionToolChoiceOptionParam,
+)
+from .chat_completion_function_call_option_param import (
+    ChatCompletionFunctionCallOptionParam as ChatCompletionFunctionCallOptionParam,
+)
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion.py b/portkey_ai/_vendor/openai/types/chat/chat_completion.py
new file mode 100644
index 00000000..875b8e26
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion.py
@@ -0,0 +1,69 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..completion_usage import CompletionUsage
+from .chat_completion_message import ChatCompletionMessage
+from .chat_completion_token_logprob import ChatCompletionTokenLogprob
+
+__all__ = ["ChatCompletion", "Choice", "ChoiceLogprobs"]
+
+
+class ChoiceLogprobs(BaseModel):
+    content: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message content tokens with log probability information."""
+
+
+class Choice(BaseModel):
+    finish_reason: Literal[
+        "stop", "length", "tool_calls", "content_filter", "function_call"
+    ]
+    """The reason the model stopped generating tokens.
+
+    This will be `stop` if the model hit a natural stop point or a provided stop
+    sequence, `length` if the maximum number of tokens specified in the request was
+    reached, `content_filter` if content was omitted due to a flag from our content
+    filters, `tool_calls` if the model called a tool, or `function_call`
+    (deprecated) if the model called a function.
+    """
+
+    index: int
+    """The index of the choice in the list of choices."""
+
+    logprobs: Optional[ChoiceLogprobs] = None
+    """Log probability information for the choice."""
+
+    message: ChatCompletionMessage
+    """A chat completion message generated by the model."""
+
+
+class ChatCompletion(BaseModel):
+    id: str
+    """A unique identifier for the chat completion."""
+
+    choices: List[Choice]
+    """A list of chat completion choices.
+
+    Can be more than one if `n` is greater than 1.
+    """
+
+    created: int
+    """The Unix timestamp (in seconds) of when the chat completion was created."""
+
+    model: str
+    """The model used for the chat completion."""
+
+    object: Literal["chat.completion"]
+    """The object type, which is always `chat.completion`."""
+
+    system_fingerprint: Optional[str] = None
+    """This fingerprint represents the backend configuration that the model runs with.
+
+    Can be used in conjunction with the `seed` request parameter to understand when
+    backend changes have been made that might impact determinism.
+    """
+
+    usage: Optional[CompletionUsage] = None
+    """Usage statistics for the completion request."""
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_assistant_message_param.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_assistant_message_param.py
new file mode 100644
index 00000000..e1e39948
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_assistant_message_param.py
@@ -0,0 +1,51 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .chat_completion_message_tool_call_param import ChatCompletionMessageToolCallParam
+
+__all__ = ["ChatCompletionAssistantMessageParam", "FunctionCall"]
+
+
+class FunctionCall(TypedDict, total=False):
+    arguments: Required[str]
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: Required[str]
+    """The name of the function to call."""
+
+
+class ChatCompletionAssistantMessageParam(TypedDict, total=False):
+    role: Required[Literal["assistant"]]
+    """The role of the messages author, in this case `assistant`."""
+
+    content: Optional[str]
+    """The contents of the assistant message.
+
+    Required unless `tool_calls` or `function_call` is specified.
+    """
+
+    function_call: FunctionCall
+    """Deprecated and replaced by `tool_calls`.
+
+    The name and arguments of a function that should be called, as generated by the
+    model.
+    """
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
+
+    tool_calls: Iterable[ChatCompletionMessageToolCallParam]
+    """The tool calls generated by the model, such as function calls."""
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_chunk.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_chunk.py
new file mode 100644
index 00000000..9bbf9152
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_chunk.py
@@ -0,0 +1,140 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..completion_usage import CompletionUsage
+from .chat_completion_token_logprob import ChatCompletionTokenLogprob
+
+__all__ = [
+    "ChatCompletionChunk",
+    "Choice",
+    "ChoiceDelta",
+    "ChoiceDeltaFunctionCall",
+    "ChoiceDeltaToolCall",
+    "ChoiceDeltaToolCallFunction",
+    "ChoiceLogprobs",
+]
+
+
+class ChoiceDeltaFunctionCall(BaseModel):
+    arguments: Optional[str] = None
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: Optional[str] = None
+    """The name of the function to call."""
+
+
+class ChoiceDeltaToolCallFunction(BaseModel):
+    arguments: Optional[str] = None
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: Optional[str] = None
+    """The name of the function to call."""
+
+
+class ChoiceDeltaToolCall(BaseModel):
+    index: int
+
+    id: Optional[str] = None
+    """The ID of the tool call."""
+
+    function: Optional[ChoiceDeltaToolCallFunction] = None
+
+    type: Optional[Literal["function"]] = None
+    """The type of the tool. Currently, only `function` is supported."""
+
+
+class ChoiceDelta(BaseModel):
+    content: Optional[str] = None
+    """The contents of the chunk message."""
+
+    function_call: Optional[ChoiceDeltaFunctionCall] = None
+    """Deprecated and replaced by `tool_calls`.
+
+    The name and arguments of a function that should be called, as generated by the
+    model.
+    """
+
+    role: Optional[Literal["system", "user", "assistant", "tool"]] = None
+    """The role of the author of this message."""
+
+    tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
+
+
+class ChoiceLogprobs(BaseModel):
+    content: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message content tokens with log probability information."""
+
+
+class Choice(BaseModel):
+    delta: ChoiceDelta
+    """A chat completion delta generated by streamed model responses."""
+
+    finish_reason: Optional[
+        Literal["stop", "length", "tool_calls", "content_filter", "function_call"]
+    ] = None
+    """The reason the model stopped generating tokens.
+
+    This will be `stop` if the model hit a natural stop point or a provided stop
+    sequence, `length` if the maximum number of tokens specified in the request was
+    reached, `content_filter` if content was omitted due to a flag from our content
+    filters, `tool_calls` if the model called a tool, or `function_call`
+    (deprecated) if the model called a function.
+    """
+
+    index: int
+    """The index of the choice in the list of choices."""
+
+    logprobs: Optional[ChoiceLogprobs] = None
+    """Log probability information for the choice."""
+
+
+class ChatCompletionChunk(BaseModel):
+    id: str
+    """A unique identifier for the chat completion. Each chunk has the same ID."""
+
+    choices: List[Choice]
+    """A list of chat completion choices.
+
+    Can contain more than one elements if `n` is greater than 1. Can also be empty
+    for the last chunk if you set `stream_options: {"include_usage": true}`.
+    """
+
+    created: int
+    """The Unix timestamp (in seconds) of when the chat completion was created.
+
+    Each chunk has the same timestamp.
+    """
+
+    model: str
+    """The model to generate the completion."""
+
+    object: Literal["chat.completion.chunk"]
+    """The object type, which is always `chat.completion.chunk`."""
+
+    system_fingerprint: Optional[str] = None
+    """
+    This fingerprint represents the backend configuration that the model runs with.
+    Can be used in conjunction with the `seed` request parameter to understand when
+    backend changes have been made that might impact determinism.
+    """
+
+    usage: Optional[CompletionUsage] = None
+    """
+    An optional field that will only be present when you set
+    `stream_options: {"include_usage": true}` in your request. When present, it
+    contains a null value except for the last chunk which contains the token usage
+    statistics for the entire request.
+    """
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_content_part_image_param.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_content_part_image_param.py
new file mode 100644
index 00000000..b1a186aa
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_content_part_image_param.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionContentPartImageParam", "ImageURL"]
+
+
+class ImageURL(TypedDict, total=False):
+    url: Required[str]
+    """Either a URL of the image or the base64 encoded image data."""
+
+    detail: Literal["auto", "low", "high"]
+    """Specifies the detail level of the image.
+
+    Learn more in the
+    [Vision guide](https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding).
+    """
+
+
+class ChatCompletionContentPartImageParam(TypedDict, total=False):
+    image_url: Required[ImageURL]
+
+    type: Required[Literal["image_url"]]
+    """The type of the content part."""
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_content_part_param.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_content_part_param.py
new file mode 100644
index 00000000..08adb75c
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_content_part_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+from .chat_completion_content_part_image_param import (
+    ChatCompletionContentPartImageParam,
+)
+
+__all__ = ["ChatCompletionContentPartParam"]
+
+ChatCompletionContentPartParam = Union[
+    ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam
+]
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_content_part_text_param.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_content_part_text_param.py
new file mode 100644
index 00000000..a2707444
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_content_part_text_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionContentPartTextParam"]
+
+
+class ChatCompletionContentPartTextParam(TypedDict, total=False):
+    text: Required[str]
+    """The text content."""
+
+    type: Required[Literal["text"]]
+    """The type of the content part."""
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_function_call_option_param.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_function_call_option_param.py
new file mode 100644
index 00000000..2bc014af
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_function_call_option_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["ChatCompletionFunctionCallOptionParam"]
+
+
+class ChatCompletionFunctionCallOptionParam(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to call."""
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_function_message_param.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_function_message_param.py
new file mode 100644
index 00000000..5af12bf9
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_function_message_param.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionFunctionMessageParam"]
+
+
+class ChatCompletionFunctionMessageParam(TypedDict, total=False):
+    content: Required[Optional[str]]
+    """The contents of the function message."""
+
+    name: Required[str]
+    """The name of the function to call."""
+
+    role: Required[Literal["function"]]
+    """The role of the messages author, in this case `function`."""
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_message.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_message.py
new file mode 100644
index 00000000..8db7d17d
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_message.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .chat_completion_message_tool_call import ChatCompletionMessageToolCall
+
+__all__ = ["ChatCompletionMessage", "FunctionCall"]
+
+
+class FunctionCall(BaseModel):
+    arguments: str
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: str
+    """The name of the function to call."""
+
+
+class ChatCompletionMessage(BaseModel):
+    content: Optional[str] = None
+    """The contents of the message."""
+
+    role: Literal["assistant"]
+    """The role of the author of this message."""
+
+    function_call: Optional[FunctionCall] = None
+    """Deprecated and replaced by `tool_calls`.
+
+    The name and arguments of a function that should be called, as generated by the
+    model.
+    """
+
+    tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
+    """The tool calls generated by the model, such as function calls."""
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_message_param.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_message_param.py
new file mode 100644
index 00000000..a3644a53
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_message_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+
+from .chat_completion_tool_message_param import ChatCompletionToolMessageParam
+from .chat_completion_user_message_param import ChatCompletionUserMessageParam
+from .chat_completion_system_message_param import ChatCompletionSystemMessageParam
+from .chat_completion_function_message_param import ChatCompletionFunctionMessageParam
+from .chat_completion_assistant_message_param import ChatCompletionAssistantMessageParam
+
+__all__ = ["ChatCompletionMessageParam"]
+
+ChatCompletionMessageParam = Union[
+    ChatCompletionSystemMessageParam,
+    ChatCompletionUserMessageParam,
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionFunctionMessageParam,
+]
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_message_tool_call.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_message_tool_call.py
new file mode 100644
index 00000000..4fec6670
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_message_tool_call.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionMessageToolCall", "Function"]
+
+
+class Function(BaseModel):
+    arguments: str
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: str
+    """The name of the function to call."""
+
+
+class ChatCompletionMessageToolCall(BaseModel):
+    id: str
+    """The ID of the tool call."""
+
+    function: Function
+    """The function that the model called."""
+
+    type: Literal["function"]
+    """The type of the tool. Currently, only `function` is supported."""
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_message_tool_call_param.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_message_tool_call_param.py
new file mode 100644
index 00000000..f616c363
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_message_tool_call_param.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionMessageToolCallParam", "Function"]
+
+
+class Function(TypedDict, total=False):
+    arguments: Required[str]
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: Required[str]
+    """The name of the function to call."""
+
+
+class ChatCompletionMessageToolCallParam(TypedDict, total=False):
+    id: Required[str]
+    """The ID of the tool call."""
+
+    function: Required[Function]
+    """The function that the model called."""
+
+    type: Required[Literal["function"]]
+    """The type of the tool. Currently, only `function` is supported."""
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_named_tool_choice_param.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_named_tool_choice_param.py
new file mode 100644
index 00000000..369f8b42
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_named_tool_choice_param.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionNamedToolChoiceParam", "Function"]
+
+
+class Function(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to call."""
+
+
+class ChatCompletionNamedToolChoiceParam(TypedDict, total=False):
+    function: Required[Function]
+
+    type: Required[Literal["function"]]
+    """The type of the tool. Currently, only `function` is supported."""
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_role.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_role.py
new file mode 100644
index 00000000..1fd83888
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_role.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+__all__ = ["ChatCompletionRole"]
+
+ChatCompletionRole = Literal["system", "user", "assistant", "tool", "function"]
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_stream_options_param.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_stream_options_param.py
new file mode 100644
index 00000000..fbf72918
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_stream_options_param.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["ChatCompletionStreamOptionsParam"]
+
+
+class ChatCompletionStreamOptionsParam(TypedDict, total=False):
+    include_usage: bool
+    """If set, an additional chunk will be streamed before the `data: [DONE]` message.
+
+    The `usage` field on this chunk shows the token usage statistics for the entire
+    request, and the `choices` field will always be an empty array. All other chunks
+    will also include a `usage` field, but with a null value.
+    """
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_system_message_param.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_system_message_param.py
new file mode 100644
index 00000000..94bb3f63
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_system_message_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionSystemMessageParam"]
+
+
+class ChatCompletionSystemMessageParam(TypedDict, total=False):
+    content: Required[str]
+    """The contents of the system message."""
+
+    role: Required[Literal["system"]]
+    """The role of the messages author, in this case `system`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_token_logprob.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_token_logprob.py
new file mode 100644
index 00000000..c69e2589
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_token_logprob.py
@@ -0,0 +1,57 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionTokenLogprob", "TopLogprob"]
+
+
+class TopLogprob(BaseModel):
+    token: str
+    """The token."""
+
+    bytes: Optional[List[int]] = None
+    """A list of integers representing the UTF-8 bytes representation of the token.
+
+    Useful in instances where characters are represented by multiple tokens and
+    their byte representations must be combined to generate the correct text
+    representation. Can be `null` if there is no bytes representation for the token.
+    """
+
+    logprob: float
+    """The log probability of this token, if it is within the top 20 most likely
+    tokens.
+
+    Otherwise, the value `-9999.0` is used to signify that the token is very
+    unlikely.
+    """
+
+
+class ChatCompletionTokenLogprob(BaseModel):
+    token: str
+    """The token."""
+
+    bytes: Optional[List[int]] = None
+    """A list of integers representing the UTF-8 bytes representation of the token.
+
+    Useful in instances where characters are represented by multiple tokens and
+    their byte representations must be combined to generate the correct text
+    representation. Can be `null` if there is no bytes representation for the token.
+    """
+
+    logprob: float
+    """The log probability of this token, if it is within the top 20 most likely
+    tokens.
+
+    Otherwise, the value `-9999.0` is used to signify that the token is very
+    unlikely.
+    """
+
+    top_logprobs: List[TopLogprob]
+    """List of the most likely tokens and their log probability, at this token
+    position.
+
+    In rare cases, there may be fewer than the number of requested `top_logprobs`
+    returned.
+    """
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_tool_choice_option_param.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_tool_choice_option_param.py
new file mode 100644
index 00000000..fc9b6885
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_tool_choice_option_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+from .chat_completion_named_tool_choice_param import ChatCompletionNamedToolChoiceParam
+
+__all__ = ["ChatCompletionToolChoiceOptionParam"]
+
+ChatCompletionToolChoiceOptionParam = Union[
+    Literal["none", "auto", "required"], ChatCompletionNamedToolChoiceParam
+]
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_tool_message_param.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_tool_message_param.py
new file mode 100644
index 00000000..5c590e03
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_tool_message_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionToolMessageParam"]
+
+
+class ChatCompletionToolMessageParam(TypedDict, total=False):
+    content: Required[str]
+    """The contents of the tool message."""
+
+    role: Required[Literal["tool"]]
+    """The role of the messages author, in this case `tool`."""
+
+    tool_call_id: Required[str]
+    """Tool call that this message is responding to."""
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_tool_param.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_tool_param.py
new file mode 100644
index 00000000..0cf6ea72
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_tool_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from ...types import shared_params
+
+__all__ = ["ChatCompletionToolParam"]
+
+
+class ChatCompletionToolParam(TypedDict, total=False):
+    function: Required[shared_params.FunctionDefinition]
+
+    type: Required[Literal["function"]]
+    """The type of the tool. Currently, only `function` is supported."""
diff --git a/portkey_ai/_vendor/openai/types/chat/chat_completion_user_message_param.py b/portkey_ai/_vendor/openai/types/chat/chat_completion_user_message_param.py
new file mode 100644
index 00000000..5c15322a
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/chat_completion_user_message_param.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .chat_completion_content_part_param import ChatCompletionContentPartParam
+
+__all__ = ["ChatCompletionUserMessageParam"]
+
+
+class ChatCompletionUserMessageParam(TypedDict, total=False):
+    content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]]
+    """The contents of the user message."""
+
+    role: Required[Literal["user"]]
+    """The role of the messages author, in this case `user`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
diff --git a/portkey_ai/_vendor/openai/types/chat/completion_create_params.py b/portkey_ai/_vendor/openai/types/chat/completion_create_params.py
new file mode 100644
index 00000000..dbcafe2f
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat/completion_create_params.py
@@ -0,0 +1,267 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from ...types import shared_params
+from ..chat_model import ChatModel
+from .chat_completion_tool_param import ChatCompletionToolParam
+from .chat_completion_message_param import ChatCompletionMessageParam
+from .chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+from .chat_completion_tool_choice_option_param import (
+    ChatCompletionToolChoiceOptionParam,
+)
+from .chat_completion_function_call_option_param import (
+    ChatCompletionFunctionCallOptionParam,
+)
+
+__all__ = [
+    "CompletionCreateParamsBase",
+    "FunctionCall",
+    "Function",
+    "ResponseFormat",
+    "CompletionCreateParamsNonStreaming",
+    "CompletionCreateParamsStreaming",
+]
+
+
+class CompletionCreateParamsBase(TypedDict, total=False):
+    messages: Required[Iterable[ChatCompletionMessageParam]]
+    """A list of messages comprising the conversation so far.
+
+    [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
+    """
+
+    model: Required[Union[str, ChatModel]]
+    """ID of the model to use.
+
+    See the
+    [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
+    table for details on which models work with the Chat API.
+    """
+
+    frequency_penalty: Optional[float]
+    """Number between -2.0 and 2.0.
+
+    Positive values penalize new tokens based on their existing frequency in the
+    text so far, decreasing the model's likelihood to repeat the same line verbatim.
+
+    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+    """
+
+    function_call: FunctionCall
+    """Deprecated in favor of `tool_choice`.
+
+    Controls which (if any) function is called by the model. `none` means the model
+    will not call a function and instead generates a message. `auto` means the model
+    can pick between generating a message or calling a function. Specifying a
+    particular function via `{"name": "my_function"}` forces the model to call that
+    function.
+
+    `none` is the default when no functions are present. `auto` is the default if
+    functions are present.
+    """
+
+    functions: Iterable[Function]
+    """Deprecated in favor of `tools`.
+
+    A list of functions the model may generate JSON inputs for.
+    """
+
+    logit_bias: Optional[Dict[str, int]]
+    """Modify the likelihood of specified tokens appearing in the completion.
+
+    Accepts a JSON object that maps tokens (specified by their token ID in the
+    tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+    bias is added to the logits generated by the model prior to sampling. The exact
+    effect will vary per model, but values between -1 and 1 should decrease or
+    increase likelihood of selection; values like -100 or 100 should result in a ban
+    or exclusive selection of the relevant token.
+    """
+
+    logprobs: Optional[bool]
+    """Whether to return log probabilities of the output tokens or not.
+
+    If true, returns the log probabilities of each output token returned in the
+    `content` of `message`.
+    """
+
+    max_tokens: Optional[int]
+    """
+    The maximum number of [tokens](/tokenizer) that can be generated in the chat
+    completion.
+
+    The total length of input tokens and generated tokens is limited by the model's
+    context length.
+    [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+    for counting tokens.
+    """
+
+    n: Optional[int]
+    """How many chat completion choices to generate for each input message.
+
+    Note that you will be charged based on the number of generated tokens across all
+    of the choices. Keep `n` as `1` to minimize costs.
+    """
+
+    presence_penalty: Optional[float]
+    """Number between -2.0 and 2.0.
+
+    Positive values penalize new tokens based on whether they appear in the text so
+    far, increasing the model's likelihood to talk about new topics.
+
+    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+    """
+
+    response_format: ResponseFormat
+    """An object specifying the format that the model must output.
+
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    seed: Optional[int]
+    """
+    This feature is in Beta. If specified, our system will make a best effort to
+    sample deterministically, such that repeated requests with the same `seed` and
+    parameters should return the same result. Determinism is not guaranteed, and you
+    should refer to the `system_fingerprint` response parameter to monitor changes
+    in the backend.
+    """
+
+    stop: Union[Optional[str], List[str]]
+    """Up to 4 sequences where the API will stop generating further tokens."""
+
+    stream_options: Optional[ChatCompletionStreamOptionsParam]
+    """Options for streaming response. Only set this when you set `stream: true`."""
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+
+    We generally recommend altering this or `top_p` but not both.
+    """
+
+    tool_choice: ChatCompletionToolChoiceOptionParam
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tool and instead generates a message. `auto` means the model can
+    pick between generating a message or calling one or more tools. `required` means
+    the model must call one or more tools. Specifying a particular tool via
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+
+    `none` is the default when no tools are present. `auto` is the default if tools
+    are present.
+    """
+
+    tools: Iterable[ChatCompletionToolParam]
+    """A list of tools the model may call.
+
+    Currently, only functions are supported as a tool. Use this to provide a list of
+    functions the model may generate JSON inputs for. A max of 128 functions are
+    supported.
+    """
+
+    top_logprobs: Optional[int]
+    """
+    An integer between 0 and 20 specifying the number of most likely tokens to
+    return at each token position, each with an associated log probability.
+    `logprobs` must be set to `true` if this parameter is used.
+    """
+
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or `temperature` but not both.
+    """
+
+    user: str
+    """
+    A unique identifier representing your end-user, which can help OpenAI to monitor
+    and detect abuse.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    """
+
+
+FunctionCall = Union[Literal["none", "auto"], ChatCompletionFunctionCallOptionParam]
+
+
+class Function(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to be called.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    description: str
+    """
+    A description of what the function does, used by the model to choose when and
+    how to call the function.
+    """
+
+    parameters: shared_params.FunctionParameters
+    """The parameters the functions accepts, described as a JSON Schema object.
+
+    See the
+    [guide](https://platform.openai.com/docs/guides/text-generation/function-calling)
+    for examples, and the
+    [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
+    documentation about the format.
+
+    Omitting `parameters` defines a function with an empty parameter list.
+    """
+
+
+class ResponseFormat(TypedDict, total=False):
+    type: Literal["text", "json_object"]
+    """Must be one of `text` or `json_object`."""
+
+
+class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
+    stream: Optional[Literal[False]]
+    """If set, partial message deltas will be sent, like in ChatGPT.
+
+    Tokens will be sent as data-only
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+    as they become available, with the stream terminated by a `data: [DONE]`
+    message.
+    [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+    """
+
+
+class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
+    stream: Required[Literal[True]]
+    """If set, partial message deltas will be sent, like in ChatGPT.
+
+    Tokens will be sent as data-only
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+    as they become available, with the stream terminated by a `data: [DONE]`
+    message.
+    [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+    """
+
+
+CompletionCreateParams = Union[
+    CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming
+]
diff --git a/portkey_ai/_vendor/openai/types/chat_model.py b/portkey_ai/_vendor/openai/types/chat_model.py
new file mode 100644
index 00000000..219dab51
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/chat_model.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+__all__ = ["ChatModel"]
+
+ChatModel = Literal[
+    "gpt-4-turbo",
+    "gpt-4-turbo-2024-04-09",
+    "gpt-4-0125-preview",
+    "gpt-4-turbo-preview",
+    "gpt-4-1106-preview",
+    "gpt-4-vision-preview",
+    "gpt-4",
+    "gpt-4-0314",
+    "gpt-4-0613",
+    "gpt-4-32k",
+    "gpt-4-32k-0314",
+    "gpt-4-32k-0613",
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-16k",
+    "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo-0613",
+    "gpt-3.5-turbo-1106",
+    "gpt-3.5-turbo-0125",
+    "gpt-3.5-turbo-16k-0613",
+]
diff --git a/portkey_ai/_vendor/openai/types/completion.py b/portkey_ai/_vendor/openai/types/completion.py
new file mode 100644
index 00000000..d3b3102a
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/completion.py
@@ -0,0 +1,37 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .completion_usage import CompletionUsage
+from .completion_choice import CompletionChoice
+
+__all__ = ["Completion"]
+
+
+class Completion(BaseModel):
+    id: str
+    """A unique identifier for the completion."""
+
+    choices: List[CompletionChoice]
+    """The list of completion choices the model generated for the input prompt."""
+
+    created: int
+    """The Unix timestamp (in seconds) of when the completion was created."""
+
+    model: str
+    """The model used for completion."""
+
+    object: Literal["text_completion"]
+    """The object type, which is always "text_completion" """
+
+    system_fingerprint: Optional[str] = None
+    """This fingerprint represents the backend configuration that the model runs with.
+
+    Can be used in conjunction with the `seed` request parameter to understand when
+    backend changes have been made that might impact determinism.
+    """
+
+    usage: Optional[CompletionUsage] = None
+    """Usage statistics for the completion request."""
diff --git a/portkey_ai/_vendor/openai/types/completion_choice.py b/portkey_ai/_vendor/openai/types/completion_choice.py
new file mode 100644
index 00000000..d948ebc9
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/completion_choice.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["CompletionChoice", "Logprobs"]
+
+
+class Logprobs(BaseModel):
+    text_offset: Optional[List[int]] = None
+
+    token_logprobs: Optional[List[float]] = None
+
+    tokens: Optional[List[str]] = None
+
+    top_logprobs: Optional[List[Dict[str, float]]] = None
+
+
+class CompletionChoice(BaseModel):
+    finish_reason: Literal["stop", "length", "content_filter"]
+    """The reason the model stopped generating tokens.
+
+    This will be `stop` if the model hit a natural stop point or a provided stop
+    sequence, `length` if the maximum number of tokens specified in the request was
+    reached, or `content_filter` if content was omitted due to a flag from our
+    content filters.
+    """
+
+    index: int
+
+    logprobs: Optional[Logprobs] = None
+
+    text: str
diff --git a/portkey_ai/_vendor/openai/types/completion_create_params.py b/portkey_ai/_vendor/openai/types/completion_create_params.py
new file mode 100644
index 00000000..9c36b075
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/completion_create_params.py
@@ -0,0 +1,197 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+
+__all__ = [
+    "CompletionCreateParamsBase",
+    "CompletionCreateParamsNonStreaming",
+    "CompletionCreateParamsStreaming",
+]
+
+
+class CompletionCreateParamsBase(TypedDict, total=False):
+    model: Required[
+        Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]]
+    ]
+    """ID of the model to use.
+
+    You can use the
+    [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+    see all of your available models, or see our
+    [Model overview](https://platform.openai.com/docs/models/overview) for
+    descriptions of them.
+    """
+
+    prompt: Required[
+        Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None]
+    ]
+    """
+    The prompt(s) to generate completions for, encoded as a string, array of
+    strings, array of tokens, or array of token arrays.
+
+    Note that <|endoftext|> is the document separator that the model sees during
+    training, so if a prompt is not specified the model will generate as if from the
+    beginning of a new document.
+    """
+
+    best_of: Optional[int]
+    """
+    Generates `best_of` completions server-side and returns the "best" (the one with
+    the highest log probability per token). Results cannot be streamed.
+
+    When used with `n`, `best_of` controls the number of candidate completions and
+    `n` specifies how many to return – `best_of` must be greater than `n`.
+
+    **Note:** Because this parameter generates many completions, it can quickly
+    consume your token quota. Use carefully and ensure that you have reasonable
+    settings for `max_tokens` and `stop`.
+    """
+
+    echo: Optional[bool]
+    """Echo back the prompt in addition to the completion"""
+
+    frequency_penalty: Optional[float]
+    """Number between -2.0 and 2.0.
+
+    Positive values penalize new tokens based on their existing frequency in the
+    text so far, decreasing the model's likelihood to repeat the same line verbatim.
+
+    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+    """
+
+    logit_bias: Optional[Dict[str, int]]
+    """Modify the likelihood of specified tokens appearing in the completion.
+
+    Accepts a JSON object that maps tokens (specified by their token ID in the GPT
+    tokenizer) to an associated bias value from -100 to 100. You can use this
+    [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+    Mathematically, the bias is added to the logits generated by the model prior to
+    sampling. The exact effect will vary per model, but values between -1 and 1
+    should decrease or increase likelihood of selection; values like -100 or 100
+    should result in a ban or exclusive selection of the relevant token.
+
+    As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
+    from being generated.
+    """
+
+    logprobs: Optional[int]
+    """
+    Include the log probabilities on the `logprobs` most likely output tokens, as
+    well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+    list of the 5 most likely tokens. The API will always return the `logprob` of
+    the sampled token, so there may be up to `logprobs+1` elements in the response.
+
+    The maximum value for `logprobs` is 5.
+    """
+
+    max_tokens: Optional[int]
+    """
+    The maximum number of [tokens](/tokenizer) that can be generated in the
+    completion.
+
+    The token count of your prompt plus `max_tokens` cannot exceed the model's
+    context length.
+    [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+    for counting tokens.
+    """
+
+    n: Optional[int]
+    """How many completions to generate for each prompt.
+
+    **Note:** Because this parameter generates many completions, it can quickly
+    consume your token quota. Use carefully and ensure that you have reasonable
+    settings for `max_tokens` and `stop`.
+    """
+
+    presence_penalty: Optional[float]
+    """Number between -2.0 and 2.0.
+
+    Positive values penalize new tokens based on whether they appear in the text so
+    far, increasing the model's likelihood to talk about new topics.
+
+    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+    """
+
+    seed: Optional[int]
+    """
+    If specified, our system will make a best effort to sample deterministically,
+    such that repeated requests with the same `seed` and parameters should return
+    the same result.
+
+    Determinism is not guaranteed, and you should refer to the `system_fingerprint`
+    response parameter to monitor changes in the backend.
+    """
+
+    stop: Union[Optional[str], List[str], None]
+    """Up to 4 sequences where the API will stop generating further tokens.
+
+    The returned text will not contain the stop sequence.
+    """
+
+    stream_options: Optional[ChatCompletionStreamOptionsParam]
+    """Options for streaming response. Only set this when you set `stream: true`."""
+
+    suffix: Optional[str]
+    """The suffix that comes after a completion of inserted text.
+
+    This parameter is only supported for `gpt-3.5-turbo-instruct`.
+    """
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+
+    We generally recommend altering this or `top_p` but not both.
+    """
+
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or `temperature` but not both.
+    """
+
+    user: str
+    """
+    A unique identifier representing your end-user, which can help OpenAI to monitor
+    and detect abuse.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    """
+
+
+class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
+    stream: Optional[Literal[False]]
+    """Whether to stream back partial progress.
+
+    If set, tokens will be sent as data-only
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+    as they become available, with the stream terminated by a `data: [DONE]`
+    message.
+    [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+    """
+
+
+class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
+    stream: Required[Literal[True]]
+    """Whether to stream back partial progress.
+
+    If set, tokens will be sent as data-only
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+    as they become available, with the stream terminated by a `data: [DONE]`
+    message.
+    [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+    """
+
+
+CompletionCreateParams = Union[
+    CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming
+]
diff --git a/portkey_ai/_vendor/openai/types/completion_usage.py b/portkey_ai/_vendor/openai/types/completion_usage.py
new file mode 100644
index 00000000..ac09afd4
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/completion_usage.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .._models import BaseModel
+
+__all__ = ["CompletionUsage"]
+
+
+class CompletionUsage(BaseModel):
+    completion_tokens: int
+    """Number of tokens in the generated completion."""
+
+    prompt_tokens: int
+    """Number of tokens in the prompt."""
+
+    total_tokens: int
+    """Total number of tokens used in the request (prompt + completion)."""
diff --git a/portkey_ai/_vendor/openai/types/create_embedding_response.py b/portkey_ai/_vendor/openai/types/create_embedding_response.py
new file mode 100644
index 00000000..eff247a1
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/create_embedding_response.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .embedding import Embedding
+
+__all__ = ["CreateEmbeddingResponse", "Usage"]
+
+
+class Usage(BaseModel):
+    prompt_tokens: int
+    """The number of tokens used by the prompt."""
+
+    total_tokens: int
+    """The total number of tokens used by the request."""
+
+
+class CreateEmbeddingResponse(BaseModel):
+    data: List[Embedding]
+    """The list of embeddings generated by the model."""
+
+    model: str
+    """The name of the model used to generate the embedding."""
+
+    object: Literal["list"]
+    """The object type, which is always "list"."""
+
+    usage: Usage
+    """The usage information for the request."""
diff --git a/portkey_ai/_vendor/openai/types/embedding.py b/portkey_ai/_vendor/openai/types/embedding.py
new file mode 100644
index 00000000..769b1d16
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/embedding.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["Embedding"]
+
+
+class Embedding(BaseModel):
+    embedding: List[float]
+    """The embedding vector, which is a list of floats.
+
+    The length of vector depends on the model as listed in the
+    [embedding guide](https://platform.openai.com/docs/guides/embeddings).
+    """
+
+    index: int
+    """The index of the embedding in the list of embeddings."""
+
+    object: Literal["embedding"]
+    """The object type, which is always "embedding"."""
diff --git a/portkey_ai/_vendor/openai/types/embedding_create_params.py b/portkey_ai/_vendor/openai/types/embedding_create_params.py
new file mode 100644
index 00000000..008dd0a4
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/embedding_create_params.py
@@ -0,0 +1,59 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["EmbeddingCreateParams"]
+
+
+class EmbeddingCreateParams(TypedDict, total=False):
+    input: Required[Union[str, List[str], Iterable[int], Iterable[Iterable[int]]]]
+    """Input text to embed, encoded as a string or array of tokens.
+
+    To embed multiple inputs in a single request, pass an array of strings or array
+    of token arrays. The input must not exceed the max input tokens for the model
+    (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any
+    array must be 2048 dimensions or less.
+    [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
+    for counting tokens.
+    """
+
+    model: Required[
+        Union[
+            str,
+            Literal[
+                "text-embedding-ada-002",
+                "text-embedding-3-small",
+                "text-embedding-3-large",
+            ],
+        ]
+    ]
+    """ID of the model to use.
+
+    You can use the
+    [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+    see all of your available models, or see our
+    [Model overview](https://platform.openai.com/docs/models/overview) for
+    descriptions of them.
+    """
+
+    dimensions: int
+    """The number of dimensions the resulting output embeddings should have.
+
+    Only supported in `text-embedding-3` and later models.
+    """
+
+    encoding_format: Literal["float", "base64"]
+    """The format to return the embeddings in.
+
+    Can be either `float` or [`base64`](https://pypi.org/project/pybase64/).
+    """
+
+    user: str
+    """
+    A unique identifier representing your end-user, which can help OpenAI to monitor
+    and detect abuse.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    """
diff --git a/portkey_ai/_vendor/openai/types/file_content.py b/portkey_ai/_vendor/openai/types/file_content.py
new file mode 100644
index 00000000..b4aa08a9
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/file_content.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+__all__ = ["FileContent"]
+
+FileContent = str
diff --git a/portkey_ai/_vendor/openai/types/file_create_params.py b/portkey_ai/_vendor/openai/types/file_create_params.py
new file mode 100644
index 00000000..26e2da33
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/file_create_params.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .._types import FileTypes
+
+__all__ = ["FileCreateParams"]
+
+
+class FileCreateParams(TypedDict, total=False):
+    file: Required[FileTypes]
+    """The File object (not file name) to be uploaded."""
+
+    purpose: Required[Literal["fine-tune", "assistants"]]
+    """The intended purpose of the uploaded file.
+
+    Use "fine-tune" for
+    [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning) and
+    "assistants" for
+    [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
+    [Messages](https://platform.openai.com/docs/api-reference/messages). This allows
+    us to validate the format of the uploaded file is correct for fine-tuning.
+    """
diff --git a/portkey_ai/_vendor/openai/types/file_deleted.py b/portkey_ai/_vendor/openai/types/file_deleted.py
new file mode 100644
index 00000000..f25fa87a
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/file_deleted.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["FileDeleted"]
+
+
+class FileDeleted(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: Literal["file"]
diff --git a/portkey_ai/_vendor/openai/types/file_list_params.py b/portkey_ai/_vendor/openai/types/file_list_params.py
new file mode 100644
index 00000000..212eca13
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/file_list_params.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["FileListParams"]
+
+
+class FileListParams(TypedDict, total=False):
+    purpose: str
+    """Only return files with the given purpose."""
diff --git a/portkey_ai/_vendor/openai/types/file_object.py b/portkey_ai/_vendor/openai/types/file_object.py
new file mode 100644
index 00000000..49709150
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/file_object.py
@@ -0,0 +1,48 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["FileObject"]
+
+
+class FileObject(BaseModel):
+    id: str
+    """The file identifier, which can be referenced in the API endpoints."""
+
+    bytes: int
+    """The size of the file, in bytes."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the file was created."""
+
+    filename: str
+    """The name of the file."""
+
+    object: Literal["file"]
+    """The object type, which is always `file`."""
+
+    purpose: Literal[
+        "fine-tune", "fine-tune-results", "assistants", "assistants_output"
+    ]
+    """The intended purpose of the file.
+
+    Supported values are `fine-tune`, `fine-tune-results`, `assistants`, and
+    `assistants_output`.
+    """
+
+    status: Literal["uploaded", "processed", "error"]
+    """Deprecated.
+
+    The current status of the file, which can be either `uploaded`, `processed`, or
+    `error`.
+    """
+
+    status_details: Optional[str] = None
+    """Deprecated.
+
+    For details on why a fine-tuning training file failed validation, see the
+    `error` field on `fine_tuning.job`.
+    """
diff --git a/portkey_ai/_vendor/openai/types/fine_tuning/__init__.py b/portkey_ai/_vendor/openai/types/fine_tuning/__init__.py
new file mode 100644
index 00000000..3e423e8f
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/fine_tuning/__init__.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .fine_tuning_job import FineTuningJob as FineTuningJob
+from .job_list_params import JobListParams as JobListParams
+from .job_create_params import JobCreateParams as JobCreateParams
+from .fine_tuning_job_event import FineTuningJobEvent as FineTuningJobEvent
+from .job_list_events_params import JobListEventsParams as JobListEventsParams
+from .fine_tuning_job_integration import (
+    FineTuningJobIntegration as FineTuningJobIntegration,
+)
+from .fine_tuning_job_wandb_integration import (
+    FineTuningJobWandbIntegration as FineTuningJobWandbIntegration,
+)
+from .fine_tuning_job_wandb_integration_object import (
+    FineTuningJobWandbIntegrationObject as FineTuningJobWandbIntegrationObject,
+)
diff --git a/portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job.py b/portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job.py
new file mode 100644
index 00000000..d02c9d8a
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job.py
@@ -0,0 +1,124 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .fine_tuning_job_wandb_integration_object import (
+    FineTuningJobWandbIntegrationObject,
+)
+
+__all__ = ["FineTuningJob", "Error", "Hyperparameters"]
+
+
+class Error(BaseModel):
+    code: str
+    """A machine-readable error code."""
+
+    message: str
+    """A human-readable error message."""
+
+    param: Optional[str] = None
+    """The parameter that was invalid, usually `training_file` or `validation_file`.
+
+    This field will be null if the failure was not parameter-specific.
+    """
+
+
+class Hyperparameters(BaseModel):
+    n_epochs: Union[Literal["auto"], int]
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset. "auto" decides
+    the optimal number of epochs based on the size of the dataset. If setting the
+    number manually, we support any number between 1 and 50 epochs.
+    """
+
+
+class FineTuningJob(BaseModel):
+    id: str
+    """The object identifier, which can be referenced in the API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the fine-tuning job was created."""
+
+    error: Optional[Error] = None
+    """
+    For fine-tuning jobs that have `failed`, this will contain more information on
+    the cause of the failure.
+    """
+
+    fine_tuned_model: Optional[str] = None
+    """The name of the fine-tuned model that is being created.
+
+    The value will be null if the fine-tuning job is still running.
+    """
+
+    finished_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the fine-tuning job was finished.
+
+    The value will be null if the fine-tuning job is still running.
+    """
+
+    hyperparameters: Hyperparameters
+    """The hyperparameters used for the fine-tuning job.
+
+    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+    for more details.
+    """
+
+    model: str
+    """The base model that is being fine-tuned."""
+
+    object: Literal["fine_tuning.job"]
+    """The object type, which is always "fine_tuning.job"."""
+
+    organization_id: str
+    """The organization that owns the fine-tuning job."""
+
+    result_files: List[str]
+    """The compiled results file ID(s) for the fine-tuning job.
+
+    You can retrieve the results with the
+    [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
+    """
+
+    seed: int
+    """The seed used for the fine-tuning job."""
+
+    status: Literal[
+        "validating_files", "queued", "running", "succeeded", "failed", "cancelled"
+    ]
+    """
+    The current status of the fine-tuning job, which can be either
+    `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`.
+    """
+
+    trained_tokens: Optional[int] = None
+    """The total number of billable tokens processed by this fine-tuning job.
+
+    The value will be null if the fine-tuning job is still running.
+    """
+
+    training_file: str
+    """The file ID used for training.
+
+    You can retrieve the training data with the
+    [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
+    """
+
+    validation_file: Optional[str] = None
+    """The file ID used for validation.
+
+    You can retrieve the validation results with the
+    [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
+    """
+
+    estimated_finish: Optional[int] = None
+    """
+    The Unix timestamp (in seconds) for when the fine-tuning job is estimated to
+    finish. The value will be null if the fine-tuning job is not running.
+    """
+
+    integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
+    """A list of integrations to enable for this fine-tuning job."""
diff --git a/portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_event.py b/portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_event.py
new file mode 100644
index 00000000..2d204bb9
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobEvent"]
+
+
+class FineTuningJobEvent(BaseModel):
+    id: str
+
+    created_at: int
+
+    level: Literal["info", "warn", "error"]
+
+    message: str
+
+    object: Literal["fine_tuning.job.event"]
diff --git a/portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_integration.py b/portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_integration.py
new file mode 100644
index 00000000..bcfda0ce
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_integration.py
@@ -0,0 +1,8 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .fine_tuning_job_wandb_integration_object import (
+    FineTuningJobWandbIntegrationObject,
+)
+
+FineTuningJobIntegration = FineTuningJobWandbIntegrationObject
diff --git a/portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py b/portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
new file mode 100644
index 00000000..4ac282eb
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobWandbIntegration"]
+
+
+class FineTuningJobWandbIntegration(BaseModel):
+    project: str
+    """The name of the project that the new run will be created under."""
+
+    entity: Optional[str] = None
+    """The entity to use for the run.
+
+    This allows you to set the team or username of the WandB user that you would
+    like associated with the run. If not set, the default entity for the registered
+    WandB API key is used.
+    """
+
+    name: Optional[str] = None
+    """A display name to set for the run.
+
+    If not set, we will use the Job ID as the name.
+    """
+
+    tags: Optional[List[str]] = None
+    """A list of tags to be attached to the newly created run.
+
+    These tags are passed through directly to WandB. Some default tags are generated
+    by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
+    """
diff --git a/portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py b/portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py
new file mode 100644
index 00000000..5b94354d
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .fine_tuning_job_wandb_integration import FineTuningJobWandbIntegration
+
+__all__ = ["FineTuningJobWandbIntegrationObject"]
+
+
+class FineTuningJobWandbIntegrationObject(BaseModel):
+    type: Literal["wandb"]
+    """The type of the integration being enabled for the fine-tuning job"""
+
+    wandb: FineTuningJobWandbIntegration
+    """The settings for your integration with Weights and Biases.
+
+    This payload specifies the project that metrics will be sent to. Optionally, you
+    can set an explicit display name for your run, add tags to your run, and set a
+    default entity (team, username, etc) to be associated with your run.
+    """
diff --git a/portkey_ai/_vendor/openai/types/fine_tuning/job_create_params.py b/portkey_ai/_vendor/openai/types/fine_tuning/job_create_params.py
new file mode 100644
index 00000000..1925f90d
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/fine_tuning/job_create_params.py
@@ -0,0 +1,131 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["JobCreateParams", "Hyperparameters", "Integration", "IntegrationWandb"]
+
+
+class JobCreateParams(TypedDict, total=False):
+    model: Required[Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]]]
+    """The name of the model to fine-tune.
+
+    You can select one of the
+    [supported models](https://platform.openai.com/docs/guides/fine-tuning/what-models-can-be-fine-tuned).
+    """
+
+    training_file: Required[str]
+    """The ID of an uploaded file that contains training data.
+
+    See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+    for how to upload a file.
+
+    Your dataset must be formatted as a JSONL file. Additionally, you must upload
+    your file with the purpose `fine-tune`.
+
+    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+    for more details.
+    """
+
+    hyperparameters: Hyperparameters
+    """The hyperparameters used for the fine-tuning job."""
+
+    integrations: Optional[Iterable[Integration]]
+    """A list of integrations to enable for your fine-tuning job."""
+
+    seed: Optional[int]
+    """The seed controls the reproducibility of the job.
+
+    Passing in the same seed and job parameters should produce the same results, but
+    may differ in rare cases. If a seed is not specified, one will be generated for
+    you.
+    """
+
+    suffix: Optional[str]
+    """
+    A string of up to 18 characters that will be added to your fine-tuned model
+    name.
+
+    For example, a `suffix` of "custom-model-name" would produce a model name like
+    `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
+    """
+
+    validation_file: Optional[str]
+    """The ID of an uploaded file that contains validation data.
+
+    If you provide this file, the data is used to generate validation metrics
+    periodically during fine-tuning. These metrics can be viewed in the fine-tuning
+    results file. The same data should not be present in both train and validation
+    files.
+
+    Your dataset must be formatted as a JSONL file. You must upload your file with
+    the purpose `fine-tune`.
+
+    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+    for more details.
+    """
+
+
+class Hyperparameters(TypedDict, total=False):
+    batch_size: Union[Literal["auto"], int]
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float]
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int]
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+
+class IntegrationWandb(TypedDict, total=False):
+    project: Required[str]
+    """The name of the project that the new run will be created under."""
+
+    entity: Optional[str]
+    """The entity to use for the run.
+
+    This allows you to set the team or username of the WandB user that you would
+    like associated with the run. If not set, the default entity for the registered
+    WandB API key is used.
+    """
+
+    name: Optional[str]
+    """A display name to set for the run.
+
+    If not set, we will use the Job ID as the name.
+    """
+
+    tags: List[str]
+    """A list of tags to be attached to the newly created run.
+
+    These tags are passed through directly to WandB. Some default tags are generated
+    by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
+    """
+
+
+class Integration(TypedDict, total=False):
+    type: Required[Literal["wandb"]]
+    """The type of integration to enable.
+
+    Currently, only "wandb" (Weights and Biases) is supported.
+    """
+
+    wandb: Required[IntegrationWandb]
+    """The settings for your integration with Weights and Biases.
+
+    This payload specifies the project that metrics will be sent to. Optionally, you
+    can set an explicit display name for your run, add tags to your run, and set a
+    default entity (team, username, etc) to be associated with your run.
+    """
diff --git a/portkey_ai/_vendor/openai/types/fine_tuning/job_list_events_params.py b/portkey_ai/_vendor/openai/types/fine_tuning/job_list_events_params.py
new file mode 100644
index 00000000..e1c9a64d
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/fine_tuning/job_list_events_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["JobListEventsParams"]
+
+
+class JobListEventsParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last event from the previous pagination request."""
+
+    limit: int
+    """Number of events to retrieve."""
diff --git a/portkey_ai/_vendor/openai/types/fine_tuning/job_list_params.py b/portkey_ai/_vendor/openai/types/fine_tuning/job_list_params.py
new file mode 100644
index 00000000..5c075ca3
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/fine_tuning/job_list_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["JobListParams"]
+
+
+class JobListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last job from the previous pagination request."""
+
+    limit: int
+    """Number of fine-tuning jobs to retrieve."""
diff --git a/portkey_ai/_vendor/openai/types/fine_tuning/jobs/__init__.py b/portkey_ai/_vendor/openai/types/fine_tuning/jobs/__init__.py
new file mode 100644
index 00000000..5433e6ce
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/fine_tuning/jobs/__init__.py
@@ -0,0 +1,8 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .checkpoint_list_params import CheckpointListParams as CheckpointListParams
+from .fine_tuning_job_checkpoint import (
+    FineTuningJobCheckpoint as FineTuningJobCheckpoint,
+)
diff --git a/portkey_ai/_vendor/openai/types/fine_tuning/jobs/checkpoint_list_params.py b/portkey_ai/_vendor/openai/types/fine_tuning/jobs/checkpoint_list_params.py
new file mode 100644
index 00000000..adceb3b2
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/fine_tuning/jobs/checkpoint_list_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["CheckpointListParams"]
+
+
+class CheckpointListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last checkpoint ID from the previous pagination request."""
+
+    limit: int
+    """Number of checkpoints to retrieve."""
diff --git a/portkey_ai/_vendor/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py b/portkey_ai/_vendor/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
new file mode 100644
index 00000000..bd07317a
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FineTuningJobCheckpoint", "Metrics"]
+
+
+class Metrics(BaseModel):
+    full_valid_loss: Optional[float] = None
+
+    full_valid_mean_token_accuracy: Optional[float] = None
+
+    step: Optional[float] = None
+
+    train_loss: Optional[float] = None
+
+    train_mean_token_accuracy: Optional[float] = None
+
+    valid_loss: Optional[float] = None
+
+    valid_mean_token_accuracy: Optional[float] = None
+
+
+class FineTuningJobCheckpoint(BaseModel):
+    id: str
+    """The checkpoint identifier, which can be referenced in the API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the checkpoint was created."""
+
+    fine_tuned_model_checkpoint: str
+    """The name of the fine-tuned checkpoint model that is created."""
+
+    fine_tuning_job_id: str
+    """The name of the fine-tuning job that this checkpoint was created from."""
+
+    metrics: Metrics
+    """Metrics at the step number during the fine-tuning job."""
+
+    object: Literal["fine_tuning.job.checkpoint"]
+    """The object type, which is always "fine_tuning.job.checkpoint"."""
+
+    step_number: int
+    """The step number that the checkpoint was created at."""
diff --git a/portkey_ai/_vendor/openai/types/image.py b/portkey_ai/_vendor/openai/types/image.py
new file mode 100644
index 00000000..f48aa2c7
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/image.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["Image"]
+
+
+class Image(BaseModel):
+    b64_json: Optional[str] = None
+    """
+    The base64-encoded JSON of the generated image, if `response_format` is
+    `b64_json`.
+    """
+
+    revised_prompt: Optional[str] = None
+    """
+    The prompt that was used to generate the image, if there was any revision to the
+    prompt.
+    """
+
+    url: Optional[str] = None
+    """The URL of the generated image, if `response_format` is `url` (default)."""
diff --git a/portkey_ai/_vendor/openai/types/image_create_variation_params.py b/portkey_ai/_vendor/openai/types/image_create_variation_params.py
new file mode 100644
index 00000000..25493073
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/image_create_variation_params.py
@@ -0,0 +1,50 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .._types import FileTypes
+
+__all__ = ["ImageCreateVariationParams"]
+
+
+class ImageCreateVariationParams(TypedDict, total=False):
+    image: Required[FileTypes]
+    """The image to use as the basis for the variation(s).
+
+    Must be a valid PNG file, less than 4MB, and square.
+    """
+
+    model: Union[str, Literal["dall-e-2"], None]
+    """The model to use for image generation.
+
+    Only `dall-e-2` is supported at this time.
+    """
+
+    n: Optional[int]
+    """The number of images to generate.
+
+    Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.
+    """
+
+    response_format: Optional[Literal["url", "b64_json"]]
+    """The format in which the generated images are returned.
+
+    Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
+    image has been generated.
+    """
+
+    size: Optional[Literal["256x256", "512x512", "1024x1024"]]
+    """The size of the generated images.
+
+    Must be one of `256x256`, `512x512`, or `1024x1024`.
+    """
+
+    user: str
+    """
+    A unique identifier representing your end-user, which can help OpenAI to monitor
+    and detect abuse.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    """
diff --git a/portkey_ai/_vendor/openai/types/image_edit_params.py b/portkey_ai/_vendor/openai/types/image_edit_params.py
new file mode 100644
index 00000000..073456e3
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/image_edit_params.py
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .._types import FileTypes
+
+__all__ = ["ImageEditParams"]
+
+
+class ImageEditParams(TypedDict, total=False):
+    image: Required[FileTypes]
+    """The image to edit.
+
+    Must be a valid PNG file, less than 4MB, and square. If mask is not provided,
+    image must have transparency, which will be used as the mask.
+    """
+
+    prompt: Required[str]
+    """A text description of the desired image(s).
+
+    The maximum length is 1000 characters.
+    """
+
+    mask: FileTypes
+    """An additional image whose fully transparent areas (e.g.
+
+    where alpha is zero) indicate where `image` should be edited. Must be a valid
+    PNG file, less than 4MB, and have the same dimensions as `image`.
+    """
+
+    model: Union[str, Literal["dall-e-2"], None]
+    """The model to use for image generation.
+
+    Only `dall-e-2` is supported at this time.
+    """
+
+    n: Optional[int]
+    """The number of images to generate. Must be between 1 and 10."""
+
+    response_format: Optional[Literal["url", "b64_json"]]
+    """The format in which the generated images are returned.
+
+    Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
+    image has been generated.
+    """
+
+    size: Optional[Literal["256x256", "512x512", "1024x1024"]]
+    """The size of the generated images.
+
+    Must be one of `256x256`, `512x512`, or `1024x1024`.
+    """
+
+    user: str
+    """
+    A unique identifier representing your end-user, which can help OpenAI to monitor
+    and detect abuse.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    """
diff --git a/portkey_ai/_vendor/openai/types/image_generate_params.py b/portkey_ai/_vendor/openai/types/image_generate_params.py
new file mode 100644
index 00000000..18c56f8e
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/image_generate_params.py
@@ -0,0 +1,63 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ImageGenerateParams"]
+
+
+class ImageGenerateParams(TypedDict, total=False):
+    prompt: Required[str]
+    """A text description of the desired image(s).
+
+    The maximum length is 1000 characters for `dall-e-2` and 4000 characters for
+    `dall-e-3`.
+    """
+
+    model: Union[str, Literal["dall-e-2", "dall-e-3"], None]
+    """The model to use for image generation."""
+
+    n: Optional[int]
+    """The number of images to generate.
+
+    Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.
+    """
+
+    quality: Literal["standard", "hd"]
+    """The quality of the image that will be generated.
+
+    `hd` creates images with finer details and greater consistency across the image.
+    This param is only supported for `dall-e-3`.
+    """
+
+    response_format: Optional[Literal["url", "b64_json"]]
+    """The format in which the generated images are returned.
+
+    Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
+    image has been generated.
+    """
+
+    size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]]
+    """The size of the generated images.
+
+    Must be one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`. Must be one
+    of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3` models.
+    """
+
+    style: Optional[Literal["vivid", "natural"]]
+    """The style of the generated images.
+
+    Must be one of `vivid` or `natural`. Vivid causes the model to lean towards
+    generating hyper-real and dramatic images. Natural causes the model to produce
+    more natural, less hyper-real looking images. This param is only supported for
+    `dall-e-3`.
+    """
+
+    user: str
+    """
+    A unique identifier representing your end-user, which can help OpenAI to monitor
+    and detect abuse.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    """
diff --git a/portkey_ai/_vendor/openai/types/images_response.py b/portkey_ai/_vendor/openai/types/images_response.py
new file mode 100644
index 00000000..7cee8131
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/images_response.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+
+from .image import Image
+from .._models import BaseModel
+
+__all__ = ["ImagesResponse"]
+
+
+class ImagesResponse(BaseModel):
+    created: int
+
+    data: List[Image]
diff --git a/portkey_ai/_vendor/openai/types/model.py b/portkey_ai/_vendor/openai/types/model.py
new file mode 100644
index 00000000..2631ee8d
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/model.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["Model"]
+
+
+class Model(BaseModel):
+    id: str
+    """The model identifier, which can be referenced in the API endpoints."""
+
+    created: int
+    """The Unix timestamp (in seconds) when the model was created."""
+
+    object: Literal["model"]
+    """The object type, which is always "model"."""
+
+    owned_by: str
+    """The organization that owns the model."""
diff --git a/portkey_ai/_vendor/openai/types/model_deleted.py b/portkey_ai/_vendor/openai/types/model_deleted.py
new file mode 100644
index 00000000..7f81e1b3
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/model_deleted.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .._models import BaseModel
+
+__all__ = ["ModelDeleted"]
+
+
+class ModelDeleted(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: str
diff --git a/portkey_ai/_vendor/openai/types/moderation.py b/portkey_ai/_vendor/openai/types/moderation.py
new file mode 100644
index 00000000..5aa69182
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/moderation.py
@@ -0,0 +1,118 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["Moderation", "Categories", "CategoryScores"]
+
+
+class Categories(BaseModel):
+    harassment: bool
+    """
+    Content that expresses, incites, or promotes harassing language towards any
+    target.
+    """
+
+    harassment_threatening: bool = FieldInfo(alias="harassment/threatening")
+    """
+    Harassment content that also includes violence or serious harm towards any
+    target.
+    """
+
+    hate: bool
+    """
+    Content that expresses, incites, or promotes hate based on race, gender,
+    ethnicity, religion, nationality, sexual orientation, disability status, or
+    caste. Hateful content aimed at non-protected groups (e.g., chess players) is
+    harassment.
+    """
+
+    hate_threatening: bool = FieldInfo(alias="hate/threatening")
+    """
+    Hateful content that also includes violence or serious harm towards the targeted
+    group based on race, gender, ethnicity, religion, nationality, sexual
+    orientation, disability status, or caste.
+    """
+
+    self_harm: bool = FieldInfo(alias="self-harm")
+    """
+    Content that promotes, encourages, or depicts acts of self-harm, such as
+    suicide, cutting, and eating disorders.
+    """
+
+    self_harm_instructions: bool = FieldInfo(alias="self-harm/instructions")
+    """
+    Content that encourages performing acts of self-harm, such as suicide, cutting,
+    and eating disorders, or that gives instructions or advice on how to commit such
+    acts.
+    """
+
+    self_harm_intent: bool = FieldInfo(alias="self-harm/intent")
+    """
+    Content where the speaker expresses that they are engaging or intend to engage
+    in acts of self-harm, such as suicide, cutting, and eating disorders.
+    """
+
+    sexual: bool
+    """
+    Content meant to arouse sexual excitement, such as the description of sexual
+    activity, or that promotes sexual services (excluding sex education and
+    wellness).
+    """
+
+    sexual_minors: bool = FieldInfo(alias="sexual/minors")
+    """Sexual content that includes an individual who is under 18 years old."""
+
+    violence: bool
+    """Content that depicts death, violence, or physical injury."""
+
+    violence_graphic: bool = FieldInfo(alias="violence/graphic")
+    """Content that depicts death, violence, or physical injury in graphic detail."""
+
+
+class CategoryScores(BaseModel):
+    harassment: float
+    """The score for the category 'harassment'."""
+
+    harassment_threatening: float = FieldInfo(alias="harassment/threatening")
+    """The score for the category 'harassment/threatening'."""
+
+    hate: float
+    """The score for the category 'hate'."""
+
+    hate_threatening: float = FieldInfo(alias="hate/threatening")
+    """The score for the category 'hate/threatening'."""
+
+    self_harm: float = FieldInfo(alias="self-harm")
+    """The score for the category 'self-harm'."""
+
+    self_harm_instructions: float = FieldInfo(alias="self-harm/instructions")
+    """The score for the category 'self-harm/instructions'."""
+
+    self_harm_intent: float = FieldInfo(alias="self-harm/intent")
+    """The score for the category 'self-harm/intent'."""
+
+    sexual: float
+    """The score for the category 'sexual'."""
+
+    sexual_minors: float = FieldInfo(alias="sexual/minors")
+    """The score for the category 'sexual/minors'."""
+
+    violence: float
+    """The score for the category 'violence'."""
+
+    violence_graphic: float = FieldInfo(alias="violence/graphic")
+    """The score for the category 'violence/graphic'."""
+
+
+class Moderation(BaseModel):
+    categories: Categories
+    """A list of the categories, and whether they are flagged or not."""
+
+    category_scores: CategoryScores
+    """A list of the categories along with their scores as predicted by model."""
+
+    flagged: bool
+    """Whether any of the below categories are flagged."""
diff --git a/portkey_ai/_vendor/openai/types/moderation_create_params.py b/portkey_ai/_vendor/openai/types/moderation_create_params.py
new file mode 100644
index 00000000..d4608def
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/moderation_create_params.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ModerationCreateParams"]
+
+
+class ModerationCreateParams(TypedDict, total=False):
+    input: Required[Union[str, List[str]]]
+    """The input text to classify"""
+
+    model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]]
+    """
+    Two content moderations models are available: `text-moderation-stable` and
+    `text-moderation-latest`.
+
+    The default is `text-moderation-latest` which will be automatically upgraded
+    over time. This ensures you are always using our most accurate model. If you use
+    `text-moderation-stable`, we will provide advanced notice before updating the
+    model. Accuracy of `text-moderation-stable` may be slightly lower than for
+    `text-moderation-latest`.
+    """
diff --git a/portkey_ai/_vendor/openai/types/moderation_create_response.py b/portkey_ai/_vendor/openai/types/moderation_create_response.py
new file mode 100644
index 00000000..79684f8a
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/moderation_create_response.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+
+from .._models import BaseModel
+from .moderation import Moderation
+
+__all__ = ["ModerationCreateResponse"]
+
+
+class ModerationCreateResponse(BaseModel):
+    id: str
+    """The unique identifier for the moderation request."""
+
+    model: str
+    """The model used to generate the moderation results."""
+
+    results: List[Moderation]
+    """A list of moderation objects."""
diff --git a/portkey_ai/_vendor/openai/types/shared/__init__.py b/portkey_ai/_vendor/openai/types/shared/__init__.py
new file mode 100644
index 00000000..e085744e
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/shared/__init__.py
@@ -0,0 +1,5 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .error_object import ErrorObject as ErrorObject
+from .function_definition import FunctionDefinition as FunctionDefinition
+from .function_parameters import FunctionParameters as FunctionParameters
diff --git a/portkey_ai/_vendor/openai/types/shared/error_object.py b/portkey_ai/_vendor/openai/types/shared/error_object.py
new file mode 100644
index 00000000..32d7045e
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/shared/error_object.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["ErrorObject"]
+
+
+class ErrorObject(BaseModel):
+    code: Optional[str] = None
+
+    message: str
+
+    param: Optional[str] = None
+
+    type: str
diff --git a/portkey_ai/_vendor/openai/types/shared/function_definition.py b/portkey_ai/_vendor/openai/types/shared/function_definition.py
new file mode 100644
index 00000000..a39116d6
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/shared/function_definition.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .function_parameters import FunctionParameters
+
+__all__ = ["FunctionDefinition"]
+
+
+class FunctionDefinition(BaseModel):
+    name: str
+    """The name of the function to be called.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    description: Optional[str] = None
+    """
+    A description of what the function does, used by the model to choose when and
+    how to call the function.
+    """
+
+    parameters: Optional[FunctionParameters] = None
+    """The parameters the functions accepts, described as a JSON Schema object.
+
+    See the
+    [guide](https://platform.openai.com/docs/guides/text-generation/function-calling)
+    for examples, and the
+    [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
+    documentation about the format.
+
+    Omitting `parameters` defines a function with an empty parameter list.
+    """
diff --git a/portkey_ai/_vendor/openai/types/shared/function_parameters.py b/portkey_ai/_vendor/openai/types/shared/function_parameters.py
new file mode 100644
index 00000000..c9524e4c
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/shared/function_parameters.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict
+
+__all__ = ["FunctionParameters"]
+
+FunctionParameters = Dict[str, object]
diff --git a/portkey_ai/_vendor/openai/types/shared_params/__init__.py b/portkey_ai/_vendor/openai/types/shared_params/__init__.py
new file mode 100644
index 00000000..ef638cb2
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/shared_params/__init__.py
@@ -0,0 +1,4 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .function_definition import FunctionDefinition as FunctionDefinition
+from .function_parameters import FunctionParameters as FunctionParameters
diff --git a/portkey_ai/_vendor/openai/types/shared_params/function_definition.py b/portkey_ai/_vendor/openai/types/shared_params/function_definition.py
new file mode 100644
index 00000000..58d0203b
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/shared_params/function_definition.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from ...types import shared_params
+
+__all__ = ["FunctionDefinition"]
+
+
+class FunctionDefinition(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to be called.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    description: str
+    """
+    A description of what the function does, used by the model to choose when and
+    how to call the function.
+    """
+
+    parameters: shared_params.FunctionParameters
+    """The parameters the functions accepts, described as a JSON Schema object.
+
+    See the
+    [guide](https://platform.openai.com/docs/guides/text-generation/function-calling)
+    for examples, and the
+    [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
+    documentation about the format.
+
+    Omitting `parameters` defines a function with an empty parameter list.
+    """
diff --git a/portkey_ai/_vendor/openai/types/shared_params/function_parameters.py b/portkey_ai/_vendor/openai/types/shared_params/function_parameters.py
new file mode 100644
index 00000000..5b40efb7
--- /dev/null
+++ b/portkey_ai/_vendor/openai/types/shared_params/function_parameters.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict
+
+__all__ = ["FunctionParameters"]
+
+FunctionParameters = Dict[str, object]
diff --git a/portkey_ai/_vendor/openai/version.py b/portkey_ai/_vendor/openai/version.py
new file mode 100644
index 00000000..01a08ab5
--- /dev/null
+++ b/portkey_ai/_vendor/openai/version.py
@@ -0,0 +1,3 @@
+from ._version import __version__
+
+VERSION: str = __version__
diff --git a/portkey_ai/api_resources/apis/audio.py b/portkey_ai/api_resources/apis/audio.py
index e0367b79..c36b8381 100644
--- a/portkey_ai/api_resources/apis/audio.py
+++ b/portkey_ai/api_resources/apis/audio.py
@@ -1,7 +1,7 @@
 import json
 from typing import Any, List, Union
 from portkey_ai.api_resources.apis.api_resource import APIResource, AsyncAPIResource
-from openai._types import NotGiven, NOT_GIVEN, FileTypes
+from ..._vendor.openai._types import NotGiven, NOT_GIVEN, FileTypes
 from portkey_ai.api_resources.client import AsyncPortkey, Portkey
 import typing
 
diff --git a/portkey_ai/api_resources/apis/batches.py b/portkey_ai/api_resources/apis/batches.py
index 8fd789a3..8f3583ce 100644
--- a/portkey_ai/api_resources/apis/batches.py
+++ b/portkey_ai/api_resources/apis/batches.py
@@ -3,7 +3,7 @@
 import typing
 from portkey_ai.api_resources.apis.api_resource import APIResource, AsyncAPIResource
 from portkey_ai.api_resources.client import AsyncPortkey, Portkey
-from openai._types import NotGiven, NOT_GIVEN
+from ..._vendor.openai._types import NotGiven, NOT_GIVEN
 
 from portkey_ai.api_resources.types.batches_type import Batch, BatchList
 
diff --git a/portkey_ai/api_resources/apis/chat_complete.py b/portkey_ai/api_resources/apis/chat_complete.py
index 5364a4d3..f3eec7ee 100644
--- a/portkey_ai/api_resources/apis/chat_complete.py
+++ b/portkey_ai/api_resources/apis/chat_complete.py
@@ -17,7 +17,7 @@
 )
 
 from portkey_ai.api_resources.apis.api_resource import APIResource, AsyncAPIResource
-from openai._types import NotGiven, NOT_GIVEN
+from ..._vendor.openai._types import NotGiven, NOT_GIVEN
 
 __all__ = ["ChatCompletion", "AsyncChatCompletion"]
 
diff --git a/portkey_ai/api_resources/apis/complete.py b/portkey_ai/api_resources/apis/complete.py
index 9f4150fc..1c8c0e3a 100644
--- a/portkey_ai/api_resources/apis/complete.py
+++ b/portkey_ai/api_resources/apis/complete.py
@@ -1,7 +1,7 @@
 import json
 from typing import AsyncIterator, Iterator, Optional, Union
 from portkey_ai.api_resources.client import AsyncPortkey, Portkey
-from openai._types import NotGiven, NOT_GIVEN
+from ..._vendor.openai._types import NotGiven, NOT_GIVEN
 
 # from portkey_ai.api_resources.utils import TextCompletionChunk
 from portkey_ai.api_resources.types.complete_type import (
diff --git a/portkey_ai/api_resources/apis/embeddings.py b/portkey_ai/api_resources/apis/embeddings.py
index 0e112444..a53af750 100644
--- a/portkey_ai/api_resources/apis/embeddings.py
+++ b/portkey_ai/api_resources/apis/embeddings.py
@@ -4,7 +4,7 @@
 from portkey_ai.api_resources.apis.api_resource import APIResource, AsyncAPIResource
 from portkey_ai.api_resources.client import AsyncPortkey, Portkey
 from portkey_ai.api_resources.types.embeddings_type import CreateEmbeddingResponse
-from openai._types import NotGiven, NOT_GIVEN
+from ..._vendor.openai._types import NotGiven, NOT_GIVEN
 
 
 class Embeddings(APIResource):
diff --git a/portkey_ai/api_resources/apis/fine_tuning.py b/portkey_ai/api_resources/apis/fine_tuning.py
index d1be95cc..86d45903 100644
--- a/portkey_ai/api_resources/apis/fine_tuning.py
+++ b/portkey_ai/api_resources/apis/fine_tuning.py
@@ -2,8 +2,8 @@
 from typing import Iterable, Optional, Union
 from portkey_ai.api_resources.apis.api_resource import APIResource, AsyncAPIResource
 from portkey_ai.api_resources.client import AsyncPortkey, Portkey
-from openai._types import NotGiven, NOT_GIVEN
-from openai.types.fine_tuning import job_create_params
+from ..._vendor.openai._types import NotGiven, NOT_GIVEN
+from ..._vendor.openai.types.fine_tuning import job_create_params
 
 from portkey_ai.api_resources.types.fine_tuning_type import (
     FineTuningJob,
diff --git a/portkey_ai/api_resources/apis/images.py b/portkey_ai/api_resources/apis/images.py
index 5b4a6bb8..4212a35f 100644
--- a/portkey_ai/api_resources/apis/images.py
+++ b/portkey_ai/api_resources/apis/images.py
@@ -4,7 +4,7 @@
 from portkey_ai.api_resources.apis.api_resource import APIResource, AsyncAPIResource
 from portkey_ai.api_resources.client import AsyncPortkey, Portkey
 from portkey_ai.api_resources.types.image_type import ImagesResponse
-from openai._types import NotGiven, NOT_GIVEN
+from ..._vendor.openai._types import NotGiven, NOT_GIVEN
 
 
 class Images(APIResource):
diff --git a/portkey_ai/api_resources/apis/models.py b/portkey_ai/api_resources/apis/models.py
index 26857ed5..a8d2a920 100644
--- a/portkey_ai/api_resources/apis/models.py
+++ b/portkey_ai/api_resources/apis/models.py
@@ -3,7 +3,7 @@
 from portkey_ai.api_resources.apis.api_resource import APIResource, AsyncAPIResource
 from portkey_ai.api_resources.client import AsyncPortkey, Portkey
 from portkey_ai.api_resources.types.models_type import Model, ModelDeleted, ModelList
-from openai._types import NotGiven, NOT_GIVEN
+from ..._vendor.openai._types import NotGiven, NOT_GIVEN
 
 
 class Models(APIResource):
diff --git a/portkey_ai/api_resources/apis/moderations.py b/portkey_ai/api_resources/apis/moderations.py
index 8b6f55dc..ad7536e7 100644
--- a/portkey_ai/api_resources/apis/moderations.py
+++ b/portkey_ai/api_resources/apis/moderations.py
@@ -2,7 +2,7 @@
 from typing import List, Union
 from portkey_ai.api_resources.apis.api_resource import APIResource, AsyncAPIResource
 from portkey_ai.api_resources.client import AsyncPortkey, Portkey
-from openai._types import NotGiven, NOT_GIVEN
+from ..._vendor.openai._types import NotGiven, NOT_GIVEN
 from portkey_ai.api_resources.types.moderations_type import ModerationCreateResponse
 
 
diff --git a/portkey_ai/api_resources/apis/threads.py b/portkey_ai/api_resources/apis/threads.py
index a1e1c9c8..5343dec7 100644
--- a/portkey_ai/api_resources/apis/threads.py
+++ b/portkey_ai/api_resources/apis/threads.py
@@ -16,15 +16,15 @@
     RunStepList,
 )
 from portkey_ai.api_resources.types.thread_type import Thread, ThreadDeleted
-from openai._types import NotGiven, NOT_GIVEN
-from openai.types.beta import thread_create_and_run_params
-from openai.types.beta.assistant_response_format_option_param import (
+from ..._vendor.openai._types import NotGiven, NOT_GIVEN
+from ..._vendor.openai.types.beta import thread_create_and_run_params
+from ..._vendor.openai.types.beta.assistant_response_format_option_param import (
     AssistantResponseFormatOptionParam,
 )
-from openai.types.beta.assistant_tool_choice_option_param import (
+from ..._vendor.openai.types.beta.assistant_tool_choice_option_param import (
     AssistantToolChoiceOptionParam,
 )
-from openai.lib.streaming import (
+from ..._vendor.openai.lib.streaming import (
     AssistantEventHandler,
     AssistantEventHandlerT,
     AssistantStreamManager,
@@ -32,11 +32,11 @@
     AsyncAssistantEventHandlerT,
     AsyncAssistantStreamManager,
 )
-from openai.types.beta.threads import (
+from ..._vendor.openai.types.beta.threads import (
     run_create_params,
     run_submit_tool_outputs_params,
 )
-from openai.types.beta.assistant_tool_param import AssistantToolParam
+from ..._vendor.openai.types.beta.assistant_tool_param import AssistantToolParam
 
 
 class Threads(APIResource):
diff --git a/portkey_ai/api_resources/apis/vector_stores.py b/portkey_ai/api_resources/apis/vector_stores.py
index 4d9029ae..16d091d9 100644
--- a/portkey_ai/api_resources/apis/vector_stores.py
+++ b/portkey_ai/api_resources/apis/vector_stores.py
@@ -3,8 +3,8 @@
 import typing
 from portkey_ai.api_resources.apis.api_resource import APIResource, AsyncAPIResource
 from portkey_ai.api_resources.client import AsyncPortkey, Portkey
-from openai._types import NotGiven, NOT_GIVEN, FileTypes
-from openai.types.beta import (
+from ..._vendor.openai._types import NotGiven, NOT_GIVEN, FileTypes
+from ..._vendor.openai.types.beta import (
     vector_store_create_params,
     vector_store_update_params,
 )
diff --git a/portkey_ai/api_resources/client.py b/portkey_ai/api_resources/client.py
index fe342019..ba219023 100644
--- a/portkey_ai/api_resources/client.py
+++ b/portkey_ai/api_resources/client.py
@@ -5,7 +5,8 @@
 from portkey_ai.api_resources import apis
 from portkey_ai.api_resources.base_client import APIClient, AsyncAPIClient
 
-from openai import AsyncOpenAI, OpenAI
+# from openai import AsyncOpenAI, OpenAI
+from .._vendor.openai import OpenAI, AsyncOpenAI
 from portkey_ai.api_resources.global_constants import (
     OPEN_AI_API_KEY,
 )
diff --git a/portkey_ai/api_resources/types/batches_type.py b/portkey_ai/api_resources/types/batches_type.py
index 4837d8ff..0d24b6af 100644
--- a/portkey_ai/api_resources/types/batches_type.py
+++ b/portkey_ai/api_resources/types/batches_type.py
@@ -5,8 +5,8 @@
 from .utils import parse_headers
 from typing import List, Any
 from pydantic import BaseModel, PrivateAttr
-from openai.types.batch_error import BatchError
-from openai.types.batch_request_counts import BatchRequestCounts
+from ..._vendor.openai.types.batch_error import BatchError
+from ..._vendor.openai.types.batch_request_counts import BatchRequestCounts
 
 __all__ = ["Batch", "BatchList", "Errors"]
 
diff --git a/portkey_ai/api_resources/types/fine_tuning_type.py b/portkey_ai/api_resources/types/fine_tuning_type.py
index 59316b14..6a74689e 100644
--- a/portkey_ai/api_resources/types/fine_tuning_type.py
+++ b/portkey_ai/api_resources/types/fine_tuning_type.py
@@ -4,7 +4,7 @@
 from .utils import parse_headers
 from typing import List, Any
 from pydantic import BaseModel, PrivateAttr
-from openai.types.fine_tuning import FineTuningJobWandbIntegrationObject
+from ..._vendor.openai.types.fine_tuning import FineTuningJobWandbIntegrationObject
 
 __all__ = [
     "Error",
diff --git a/portkey_ai/api_resources/types/moderations_type.py b/portkey_ai/api_resources/types/moderations_type.py
index c85547ae..694251d1 100644
--- a/portkey_ai/api_resources/types/moderations_type.py
+++ b/portkey_ai/api_resources/types/moderations_type.py
@@ -4,7 +4,7 @@
 from .utils import parse_headers
 from typing import List, Any
 from pydantic import BaseModel, PrivateAttr
-from openai.types.moderation import Moderation
+from ..._vendor.openai.types.moderation import Moderation
 
 
 __all__ = ["ModerationCreateResponse"]
diff --git a/vendorize.toml b/vendorize.toml
new file mode 100644
index 00000000..f6624b4b
--- /dev/null
+++ b/vendorize.toml
@@ -0,0 +1,4 @@
+target = "portkey_ai/_vendor"
+packages = [
+    "openai==1.26.0"
+]
\ No newline at end of file