From 0f173bace51c0f68d0dc3667a340355996b418f0 Mon Sep 17 00:00:00 2001 From: Karthik Chikmagalur Date: Mon, 4 Nov 2024 19:00:17 -0800 Subject: [PATCH] gptel-anthropic: Add support for sending PDFs (#459) * gptel-anthropic.el (gptel-make-anthropic, gptel--anthropic-parse-multipart, gptel--anthropic-models): Add support for sending PDFs to the model `claude-3-5-sonnet-20241022'. This is the only model that supports reading PDFs as of now. Cache sent PDFs so follow up the input cost of reading the PDF in follow up messages is 90% cheaper. --- gptel-anthropic.el | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/gptel-anthropic.el b/gptel-anthropic.el index 78a28c2..5448543 100644 --- a/gptel-anthropic.el +++ b/gptel-anthropic.el @@ -132,18 +132,31 @@ format." for part in parts for n upfrom 1 with last = (length parts) + with type for text = (plist-get part :text) + for mime = (plist-get part :mime) for media = (plist-get part :media) if text do (and (or (= n 1) (= n last)) (setq text (gptel--trim-prefixes text))) and unless (string-empty-p text) collect `(:type "text" :text ,text) into parts-array end else if media - collect - `(:type "image" + do + (setq type (cond ;Currently supported: Images and PDFs + ((equal (substring mime 0 5) "image") "image") + ;; NOTE: Only Claude 3.5 Sonnet supports PDF documents: + ((equal mime "application/pdf") "document") + (t (error (concat "(gptel-anthropic) Request aborted: " + "trying to send unsupported MIME type %s") + mime)))) + and collect + `(:type ,type :source (:type "base64" :media_type ,(plist-get part :mime) - :data ,(gptel--base64-encode media))) + :data ,(gptel--base64-encode media)) + ;; TODO Make media caching a user option + ,@(and (gptel--model-capable-p 'cache) + '(:cache_control (:type "ephemeral")))) into parts-array finally return (vconcat parts-array))) @@ -186,15 +199,15 @@ files in the context." (defconst gptel--anthropic-models '((claude-3-5-sonnet-20241022 :description "Highest level of intelligence and capability" - :capabilities (media tool) - :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") + :capabilities (media tool cache) + :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp" "application/pdf") :context-window 200 :input-cost 3 :output-cost 15 :cutoff-date "2024-04") (claude-3-5-sonnet-20240620 :description "Highest level of intelligence and capability" - :capabilities (media tool) + :capabilities (media tool cache) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 200 :input-cost 3 @@ -202,7 +215,7 @@ files in the context." :cutoff-date "2024-04") (claude-3-opus-20240229 :description "Top-level performance, intelligence, fluency, and understanding" - :capabilities (media tool) + :capabilities (media tool cache) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 200 :input-cost 15 @@ -262,7 +275,9 @@ sources: (header (lambda () (when-let (key (gptel--get-api-key)) `(("x-api-key" . ,key) - ("anthropic-version" . "2023-06-01"))))) + ("anthropic-version" . "2023-06-01") + ("anthropic-beta" . "pdfs-2024-09-25") + ("anthropic-beta" . "prompt-caching-2024-07-31"))))) (models gptel--anthropic-models) (host "api.anthropic.com") (protocol "https")