[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/llm 56f70e460b: Fix URL for Gemini and Vertex, and simp
From: |
ELPA Syncer |
Subject: |
[elpa] externals/llm 56f70e460b: Fix URL for Gemini and Vertex, and simplify parsing |
Date: |
Sat, 3 Feb 2024 15:58:20 -0500 (EST) |
branch: externals/llm
commit 56f70e460bce5a749c2c961f1f704b9198a6a44e
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: Andrew Hyatt <ahyatt@gmail.com>
Fix URL for Gemini and Vertex, and simplify parsing
Previously, we were using the non-streaming URL instead of the streaming
URL.
This fixes https://github.com/ahyatt/llm/issues/20.
---
NEWS.org | 1 +
llm-gemini.el | 15 +++++++++------
llm-vertex.el | 48 +++++++++++-------------------------------------
3 files changed, 21 insertions(+), 43 deletions(-)
diff --git a/NEWS.org b/NEWS.org
index ade9e7f72b..dd514663b9 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,6 +1,7 @@
* Version 0.9.1
- Default to the new "text-embedding-3-small" model for Open AI. *Important*:
Anyone who has stored embeddings should either regenerate embeddings
(recommended) or hard-code the old embedding model ("text-embedding-ada-002").
- Fix response breaking when prompts run afoul of Gemini / Vertex's safety
checks.
+- Change Gemini streaming to be the correct URL. This doesn't seem to have an
effect on behavior.
* Version 0.9
- Add =llm-chat-token-limit= to find the token limit based on the model.
- Add request timeout customization.
diff --git a/llm-gemini.el b/llm-gemini.el
index 07b7aaa093..3c80872333 100644
--- a/llm-gemini.el
+++ b/llm-gemini.el
@@ -72,10 +72,13 @@ You can get this at
https://makersuite.google.com/app/apikey."
buf error-callback
'error (llm-vertex--error-message
data))))))
-(defun llm-gemini--chat-url (provider)
- "Return the URL for the chat request, using PROVIDER."
- (format
"https://generativelanguage.googleapis.com/v1beta/models/%s:generateContent?key=%s"
+;; from https://ai.google.dev/tutorials/rest_quickstart
+(defun llm-gemini--chat-url (provider streaming-p)
+ "Return the URL for the chat request, using PROVIDER.
+If STREAMING-P is non-nil, use the streaming endpoint."
+ (format
"https://generativelanguage.googleapis.com/v1beta/models/%s:%s?key=%s"
(llm-gemini-chat-model provider)
+ (if streaming-p "streamGenerateContent" "generateContent")
(llm-gemini-key provider)))
(defun llm-gemini--get-chat-response (response)
@@ -85,7 +88,7 @@ You can get this at https://makersuite.google.com/app/apikey."
(cl-defmethod llm-chat ((provider llm-gemini) prompt)
(let ((response (llm-vertex--get-chat-response-streaming
- (llm-request-sync (llm-gemini--chat-url provider)
+ (llm-request-sync (llm-gemini--chat-url provider nil)
:data (llm-vertex--chat-request-streaming
prompt)))))
(setf (llm-chat-prompt-interactions prompt)
(append (llm-chat-prompt-interactions prompt)
@@ -94,10 +97,10 @@ You can get this at
https://makersuite.google.com/app/apikey."
(cl-defmethod llm-chat-streaming ((provider llm-gemini) prompt
partial-callback response-callback error-callback)
(let ((buf (current-buffer)))
- (llm-request-async (llm-gemini--chat-url provider)
+ (llm-request-async (llm-gemini--chat-url provider t)
:data (llm-vertex--chat-request-streaming prompt)
:on-partial (lambda (partial)
- (when-let ((response
(llm-vertex--get-partial-chat-ui-repsonse partial)))
+ (when-let ((response
(llm-vertex--get-partial-chat-response partial)))
(llm-request-callback-in-buffer buf
partial-callback response)))
:on-success (lambda (data)
(let ((response
(llm-vertex--get-chat-response-streaming data)))
diff --git a/llm-vertex.el b/llm-vertex.el
index a48b4434ef..2427fde5ef 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -161,44 +161,18 @@ This handles different kinds of models."
""))
"NOTE: No response was sent back by the LLM, the prompt may have
violated safety checks."))))
-(defun llm-vertex--get-partial-chat-ui-repsonse (response)
- "Return the partial response from as much of RESPONSE as we can parse.
-If the response is not parseable, return nil."
+(defun llm-vertex--get-partial-chat-response (response)
+ "Return the partial response from as much of RESPONSE as we can parse."
(with-temp-buffer
(insert response)
- (let ((start (point-min))
- (end-of-valid-chunk
- (save-excursion
- (goto-char (point-max))
- (search-backward "\n," nil t)
- (point))))
- (when (and start end-of-valid-chunk)
- ;; It'd be nice if our little algorithm always worked, but doesn't, so
let's
- ;; just ignore when it fails. As long as it mostly succeeds, it
should be fine.
- (condition-case nil
- (when-let
- ((json (ignore-errors
- (or
- (json-read-from-string
- (concat
- (buffer-substring-no-properties
- start end-of-valid-chunk)
- ;; Close off the json
- "]"))
- ;; Needed when we only get a promptFeedback back,
- ;; which happens when the prompt violates safety
- ;; checks.
- (json-read-from-string
- (buffer-substring-no-properties
- start (save-excursion
- (goto-char (point-max))
- (search-backward "}" nil t)
- (1+ (point)))))))))
- (llm-vertex--get-chat-response-streaming json))
- (error (message "Unparseable buffer saved to
*llm-vertex-unparseable*")
- (with-current-buffer (get-buffer-create
"*llm-vertex-unparseable*")
- (erase-buffer)
- (insert response))))))))
+ (let ((result ""))
+ ;; We just will parse every line that is "text": "..." and concatenate
them.
+ (save-excursion
+ (goto-char (point-min))
+ (while (re-search-forward (rx (seq (literal "\"text\": ")
+ (group-n 1 ?\" (* any) ?\")
line-end)) nil t)
+ (setq result (concat result (json-read-from-string (match-string
1))))))
+ result)))
(defun llm-vertex--chat-request-streaming (prompt)
"Return an alist with chat input for the streaming API.
@@ -260,7 +234,7 @@ If STREAMING is non-nil, use the URL for the streaming API."
:headers `(("Authorization" . ,(format "Bearer %s"
(llm-vertex-key provider))))
:data (llm-vertex--chat-request-streaming prompt)
:on-partial (lambda (partial)
- (when-let ((response
(llm-vertex--get-partial-chat-ui-repsonse partial)))
+ (when-let ((response
(llm-vertex--get-partial-chat-response partial)))
(llm-request-callback-in-buffer buf
partial-callback response)))
:on-success (lambda (data)
(let ((response
(llm-vertex--get-chat-response-streaming data)))
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [elpa] externals/llm 56f70e460b: Fix URL for Gemini and Vertex, and simplify parsing,
ELPA Syncer <=