[elpa] externals/llm 56f70e460b: Fix URL for Gemini and Vertex, and simp

emacs-elpa-diffs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/llm 56f70e460b: Fix URL for Gemini and Vertex, and simp

From:	ELPA Syncer
Subject:	[elpa] externals/llm 56f70e460b: Fix URL for Gemini and Vertex, and simplify parsing
Date:	Sat, 3 Feb 2024 15:58:20 -0500 (EST)

branch: externals/llm
commit 56f70e460bce5a749c2c961f1f704b9198a6a44e
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: Andrew Hyatt <ahyatt@gmail.com>

    Fix URL for Gemini and Vertex, and simplify parsing
    
    Previously, we were using the non-streaming URL instead of the streaming 
URL.
    
    This fixes https://github.com/ahyatt/llm/issues/20.
---
 NEWS.org      |  1 +
 llm-gemini.el | 15 +++++++++------
 llm-vertex.el | 48 +++++++++++-------------------------------------
 3 files changed, 21 insertions(+), 43 deletions(-)

diff --git a/NEWS.org b/NEWS.org
index ade9e7f72b..dd514663b9 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,6 +1,7 @@
 * Version 0.9.1
 - Default to the new "text-embedding-3-small" model for Open AI.  *Important*: 
Anyone who has stored embeddings should either regenerate embeddings 
(recommended) or hard-code the old embedding model ("text-embedding-ada-002").
 - Fix response breaking when prompts run afoul of Gemini / Vertex's safety 
checks.
+- Change Gemini streaming to be the correct URL.  This doesn't seem to have an 
effect on behavior.
 * Version 0.9
 - Add =llm-chat-token-limit= to find the token limit based on the model.
 - Add request timeout customization.
diff --git a/llm-gemini.el b/llm-gemini.el
index 07b7aaa093..3c80872333 100644
--- a/llm-gemini.el
+++ b/llm-gemini.el
@@ -72,10 +72,13 @@ You can get this at 
https://makersuite.google.com/app/apikey.";
                                     buf error-callback
                                     'error (llm-vertex--error-message 
data))))))
 
-(defun llm-gemini--chat-url (provider)
-  "Return the URL for the chat request, using PROVIDER."
-  (format 
"https://generativelanguage.googleapis.com/v1beta/models/%s:generateContent?key=%s";
+;; from https://ai.google.dev/tutorials/rest_quickstart
+(defun llm-gemini--chat-url (provider streaming-p)
+  "Return the URL for the chat request, using PROVIDER.
+If STREAMING-P is non-nil, use the streaming endpoint."
+  (format 
"https://generativelanguage.googleapis.com/v1beta/models/%s:%s?key=%s";
           (llm-gemini-chat-model provider)
+          (if streaming-p "streamGenerateContent" "generateContent")
           (llm-gemini-key provider)))
 
 (defun llm-gemini--get-chat-response (response)
@@ -85,7 +88,7 @@ You can get this at https://makersuite.google.com/app/apikey.";
 
 (cl-defmethod llm-chat ((provider llm-gemini) prompt)
   (let ((response (llm-vertex--get-chat-response-streaming
-                   (llm-request-sync (llm-gemini--chat-url provider)
+                   (llm-request-sync (llm-gemini--chat-url provider nil)
                                      :data (llm-vertex--chat-request-streaming 
prompt)))))
     (setf (llm-chat-prompt-interactions prompt)
           (append (llm-chat-prompt-interactions prompt)
@@ -94,10 +97,10 @@ You can get this at 
https://makersuite.google.com/app/apikey.";
 
 (cl-defmethod llm-chat-streaming ((provider llm-gemini) prompt 
partial-callback response-callback error-callback)
   (let ((buf (current-buffer)))
-    (llm-request-async (llm-gemini--chat-url provider)
+    (llm-request-async (llm-gemini--chat-url provider t)
                        :data (llm-vertex--chat-request-streaming prompt)
                        :on-partial (lambda (partial)
-                                     (when-let ((response 
(llm-vertex--get-partial-chat-ui-repsonse partial)))
+                                     (when-let ((response 
(llm-vertex--get-partial-chat-response partial)))
                                        (llm-request-callback-in-buffer buf 
partial-callback response)))
                        :on-success (lambda (data)
                                      (let ((response 
(llm-vertex--get-chat-response-streaming data)))
diff --git a/llm-vertex.el b/llm-vertex.el
index a48b4434ef..2427fde5ef 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -161,44 +161,18 @@ This handles different kinds of models."
                    ""))
              "NOTE: No response was sent back by the LLM, the prompt may have 
violated safety checks."))))
 
-(defun llm-vertex--get-partial-chat-ui-repsonse (response)
-  "Return the partial response from as much of RESPONSE as we can parse.
-If the response is not parseable, return nil."
+(defun llm-vertex--get-partial-chat-response (response)
+  "Return the partial response from as much of RESPONSE as we can parse."
   (with-temp-buffer
     (insert response)
-    (let ((start (point-min))
-          (end-of-valid-chunk
-           (save-excursion
-             (goto-char (point-max))
-             (search-backward "\n," nil t)
-             (point))))
-      (when (and start end-of-valid-chunk)
-        ;; It'd be nice if our little algorithm always worked, but doesn't, so 
let's
-        ;; just ignore when it fails.  As long as it mostly succeeds, it 
should be fine.
-        (condition-case nil
-            (when-let
-                ((json (ignore-errors
-                         (or
-                          (json-read-from-string
-                           (concat
-                            (buffer-substring-no-properties
-                             start end-of-valid-chunk)
-                            ;; Close off the json
-                            "]"))
-                          ;; Needed when we only get a promptFeedback back,
-                          ;; which happens when the prompt violates safety
-                          ;; checks.
-                          (json-read-from-string
-                           (buffer-substring-no-properties
-                            start (save-excursion
-                                    (goto-char (point-max))
-                                    (search-backward "}" nil t)
-                                    (1+ (point)))))))))
-              (llm-vertex--get-chat-response-streaming json))
-          (error (message "Unparseable buffer saved to 
*llm-vertex-unparseable*")
-                 (with-current-buffer (get-buffer-create 
"*llm-vertex-unparseable*")
-                     (erase-buffer)
-                     (insert response))))))))
+    (let ((result ""))
+      ;; We just will parse every line that is "text": "..." and concatenate 
them.   
+      (save-excursion
+        (goto-char (point-min))
+        (while (re-search-forward (rx (seq (literal "\"text\": ")
+                                           (group-n 1 ?\" (* any) ?\") 
line-end)) nil t)
+          (setq result (concat result (json-read-from-string (match-string 
1))))))
+      result)))
 
 (defun llm-vertex--chat-request-streaming (prompt)
   "Return an alist with chat input for the streaming API.
@@ -260,7 +234,7 @@ If STREAMING is non-nil, use the URL for the streaming API."
                      :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
                      :data (llm-vertex--chat-request-streaming prompt)
                      :on-partial (lambda (partial)
-                                   (when-let ((response 
(llm-vertex--get-partial-chat-ui-repsonse partial)))
+                                   (when-let ((response 
(llm-vertex--get-partial-chat-response partial)))
                                      (llm-request-callback-in-buffer buf 
partial-callback response)))
                      :on-success (lambda (data)
                                    (let ((response 
(llm-vertex--get-chat-response-streaming data)))

[Prev in Thread]

Current Thread

[Next in Thread]

[elpa] externals/llm 56f70e460b: Fix URL for Gemini and Vertex, and simplify parsing, ELPA Syncer <=

Prev by Date: [elpa] externals/ellama e745ec1fb8 7/7: Merge pull request #66 from s-kostyaev/update-ci
Next by Date: [nongnu] elpa/sweeprolog updated (fa99739410 -> 799e1aebfd)
Previous by thread: [elpa] externals/ellama updated (37060832a7 -> e745ec1fb8)
Next by thread: [nongnu] elpa/sweeprolog updated (fa99739410 -> 799e1aebfd)
Index(es):
- Date
- Thread