Implement chat_vllm()

Fixes #140
tidyverse · Nov 7, 2024 · 3bf0c51 · 3bf0c51
1 parent dda34a3
commit 3bf0c51
Show file tree

Hide file tree

Showing 5 changed files with 146 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -73,6 +73,7 @@ Collate:
     'provider-groq.R'
     'provider-ollama.R'
     'provider-perplexity.R'
+    'provider-vllm.R'
     'shiny.R'
     'tokens.R'
     'tools-def-auto.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -20,6 +20,7 @@ export(chat_groq)
 export(chat_ollama)
 export(chat_openai)
 export(chat_perplexity)
+export(chat_vllm)
 export(content_image_file)
 export(content_image_plot)
 export(content_image_url)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,7 @@
 # elmer (development version)
 
+* New `chat_vllm()` to chat with models served by vLLM (#140).
+
 * New `chat_bedrock()` to chat with AWS bedrock models (#50).
 
 * New `chat$extract_data()` uses the structured data API where available (and tool calling otherwise) to extract data structured according to a known type specification. You can create specs with functions `type_boolean()`, `type_integer()`, `type_number()`, `type_string()`, `type_enum()`, `type_array()`, and `type_object()` (#31).

diff --git a/R/provider-vllm.R b/R/provider-vllm.R
@@ -0,0 +1,79 @@
+#' @include provider-openai.R
+#' @include content.R
+NULL
+
+#' Chat with a model hosted by vLLM
+#'
+#' @description
+#' [vLLM](https://docs.vllm.ai/en/latest/) is an open source library that
+#' provides an efficient and convenient LLMs model server. You can use
+#' `chat_vllm()` to connect to endpoints powered by vLLM.
+#'
+#' @inheritParams chat_openai
+#' @inherit chat_openai return
+#' @export
+chat_vllm <- function(base_url,
+                      system_prompt = NULL,
+                      turns = NULL,
+                      model,
+                      seed = NULL,
+                      api_args = list(),
+                      api_key = vllm_key(),
+                      echo = NULL) {
+
+  check_string(base_url)
+  turns <- normalize_turns(turns, system_prompt)
+  check_string(api_key)
+  if (missing(model)) {
+    models <- vllm_models(base_url, api_key)
+    cli::cli_abort(c(
+      "Must specify {.arg model}.",
+      i = "Available models: {.str {models}}."
+    ))
+  }
+  if (is_testing() && is.null(seed)) {
+    seed <- seed %||% 1014
+  }
+  echo <- check_echo(echo)
+
+  provider <- ProviderVllm(
+    base_url = base_url,
+    model = model,
+    seed = seed,
+    extra_args = api_args,
+    api_key = api_key
+  )
+  Chat$new(provider = provider, turns = turns, echo = echo)
+}
+
+ProviderVllm <- new_class(
+  "ProviderVllm",
+  parent = ProviderOpenAI,
+  package = "elmer",
+)
+
+# Just like OpenAI but no strict
+method(as_json, list(ProviderVllm, ToolDef)) <- function(provider, x) {
+  list(
+    type = "function",
+    "function" = compact(list(
+      name = x@name,
+      description = x@description,
+      parameters = as_json(provider, x@arguments)
+    ))
+  )
+}
+
+vllm_key <- function() {
+  key_get("VLLM_KEY")
+}
+
+vllm_models <- function(base_url, key = vllm_key()) {
+  req <- request(base_url)
+  req <- req_auth_bearer_token(req, key)
+  req <- req_url_path(req, "/v1/models")
+  resp <- req_perform(req)
+  json <- resp_body_json(resp)
+
+  map_chr(json$data, "[[", "id")
+}
diff --git a/man/chat_vllm.Rd b/man/chat_vllm.Rd