[elpa] externals/llm c55854aa26: Fix issue with missing text in Open AI

emacs-elpa-diffs
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/llm c55854aa26: Fix issue with missing text in Open AI

From:	ELPA Syncer
Subject:	[elpa] externals/llm c55854aa26: Fix issue with missing text in Open AI and Ollama streaming chat
Date:	Wed, 1 Nov 2023 00:58:24 -0400 (EDT)
branch: externals/llm
commit c55854aa267df9b0567d7b911b4c10bfeff642a9
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: Andrew Hyatt <ahyatt@gmail.com>

    Fix issue with missing text in Open AI and Ollama streaming chat
    
    The previous implementation of Open AI and Ollama streaming chat had an 
issue
    where small bits of text were missing from streaming responses. We switch 
to a
    different method of processing responses where every response is on a line,
    keeping track by message number, not position, which can move around subtly
    based on things appearing and disappearing in the response buffer.
    
    This fixes https://github.com/s-kostyaev/ellama/issues/8.
---
 NEWS.org      |  1 +
 llm-ollama.el | 44 +++++++++++++++++++++++---------------------
 llm-openai.el | 43 +++++++++++++++++++++++++++++++------------
 3 files changed, 55 insertions(+), 33 deletions(-)

diff --git a/NEWS.org b/NEWS.org
index 332f61a7be..dac773075a 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,6 +1,7 @@
 * Version 0.5.1
 - Implement token counting for Google Cloud Vertex via their API.
 - Fix issue with Google Cloud Vertex erroring on multibyte strings.
+- Fix issue with small bits of missing text in Open AI and Ollama streaming 
chat.
 * Version 0.5
 - Fixes for conversation context storage, requiring clients to handle ongoing 
conversations slightly differently.
 - Fixes for proper sync request http error code handling.
diff --git a/llm-ollama.el b/llm-ollama.el
index 624aadcbe0..b36cf8c653 100644
--- a/llm-ollama.el
+++ b/llm-ollama.el
@@ -135,38 +135,40 @@ STREAMING if non-nil, turn on response streaming."
 (defvar-local llm-ollama-current-response ""
   "The response so far from the server.")
 
-(defvar-local llm-ollama-last-position 1
-  "The last position in the streamed response we read until.")
+(defvar-local llm-ollama-last-response 0
+  "The last response number we've read.")
 
 (defun llm-ollama--get-partial-chat-response (response)
   "Return the text in the partial chat response from RESPONSE."
   ;; To begin with, we should still be in the buffer with the actual response.
   (let ((current-response llm-ollama-current-response)
-        (last-position llm-ollama-last-position))
+        (last-response llm-ollama-last-response))
     (with-temp-buffer
       (insert response)
       ;; Responses in ollama are always one per line.
-      (let* ((start-pos (save-excursion (goto-char (1- last-position))
-                                        (when (search-forward-regexp (rx (seq 
line-start ?{)) nil t)
-                                          (1- (point)))))
-             (end-pos (save-excursion (goto-char (point-max))
-                                      (when (search-backward-regexp (rx (seq 
"done\":false}" line-end))
-                                                                    start-pos 
t)
+      (let* ((end-pos (save-excursion (goto-char (point-max))
+                                      (when (search-backward-regexp
+                                             (rx (seq "done\":false}" 
line-end))
+                                             nil t)
                                         (pos-eol)))))
-        (when (and start-pos end-pos)
-          (setq
-           current-response
-           (concat current-response
-                   (mapconcat
-                    ;; Skip any lines that aren't json objects.
-                    (lambda (line) (when (string-match-p (rx (seq string-start 
?{)) line)
-                                     (assoc-default 'response 
(json-read-from-string line))))
-                    (split-string (buffer-substring-no-properties start-pos 
end-pos) "\n" t))))
-          (setq last-position (1+ end-pos)))))
+        (when end-pos
+          (let ((all-lines (seq-filter
+                            (lambda (line) (string-match-p (rx (seq 
string-start ?{)) line))
+                            (split-string (buffer-substring-no-properties 1 
end-pos) "\n" t))))
+            (setq
+             current-response
+             (concat current-response
+                     (mapconcat
+                      (lambda (line) (assoc-default 'response 
(json-read-from-string line)))
+                      ;; Take from response output last-response to the end. 
This
+                      ;; counts only valid responses, so we need to throw out 
all
+                      ;; other lines that aren't valid JSON.
+                      (seq-subseq all-lines last-response))))
+            (setq last-response (length all-lines))))))
     ;; If there is no new content, don't manipulate anything.
     (when (> (length current-response) (length llm-ollama-current-response))
-      (setq-local llm-ollama-last-position last-position)
-      (setq-local llm-ollama-current-response current-response))
+      (setq llm-ollama-last-response last-response)
+      (setq llm-ollama-current-response current-response))
     current-response))
 
 (defun llm-ollama--get-final-response (response)
diff --git a/llm-openai.el b/llm-openai.el
index 679f6e783e..a55dadc4a0 100644
--- a/llm-openai.el
+++ b/llm-openai.el
@@ -191,25 +191,44 @@ STREAMING if non-nil, turn on response streaming."
 (defvar-local llm-openai-current-response ""
   "The response so far from the server.")
 
-(defvar-local llm-openai-last-position 1
-  "The last position in the streamed response we read until.")
+(defvar-local llm-openai-last-response 0
+  "The number of the last streaming response we read.
+The responses from OpenAI are not numbered, but we just number
+them from 1 to however many are sent.")
 
 (defun llm-openai--get-partial-chat-response (response)
   "Return the text in the partial chat response from RESPONSE."
   ;; To begin with, we should still be in the buffer with the actual response.
   (let ((current-response llm-openai-current-response)
-        (last-position llm-openai-last-position))
+        (last-response llm-openai-last-response))
     (with-temp-buffer
       (insert response)
-      (goto-char last-position)
-      (when (search-forward "\ndata: {" nil t)
-        (backward-char 2)
-        (ignore-errors
-          (setq current-response
-                (concat current-response (assoc-default 'content 
(assoc-default 'delta (aref (assoc-default 'choices (json-read)) 0))))))
-        (setq last-position (point))))
-    (setq-local llm-openai-current-response current-response)
-    (setq-local llm-openai-last-position last-position)
+      (let* ((complete-rx (rx (seq "finish_reason\":" (1+ (or ?\[ ?\] alpha)) 
"}]}" line-end)))
+             (end-pos (save-excursion (goto-char (point-max))
+                                      (when (search-backward-regexp
+                                             complete-rx
+                                             nil t)
+                                        (pos-eol)))))
+        (when end-pos
+          (let ((all-lines (seq-filter
+                            (lambda (line) (string-match-p complete-rx line))
+                            (split-string (buffer-substring-no-properties 1 
end-pos) "\n"))))
+            (setq current-response
+                  (concat current-response
+                          (mapconcat (lambda (line)
+                                       (assoc-default 'content
+                                                      (assoc-default
+                                                       'delta
+                                                       (aref (assoc-default
+                                                              'choices
+                                                              
(json-read-from-string
+                                                               
(replace-regexp-in-string "data: " "" line)))
+                                                             0))))
+                                     (seq-subseq all-lines last-response))))
+            (setq last-response (length all-lines))))))
+    (when (> (length current-response) (length llm-openai-current-response))
+        (setq llm-openai-current-response current-response)
+        (setq llm-openai-last-response last-response))
     current-response))
 
 (cl-defmethod llm-chat-streaming ((provider llm-openai) prompt 
partial-callback response-callback error-callback)
[Prev in Thread]
Current Thread
[Next in Thread]
[elpa] externals/llm c55854aa26: Fix issue with missing text in Open AI and Ollama streaming chat, ELPA Syncer <=
Next by Date: [elpa] externals/llm 8dafde3cab: Change the vertex multibyte fix to apply to all methods
Next by thread: [elpa] externals/llm 8dafde3cab: Change the vertex multibyte fix to apply to all methods
Index(es):
- Date
- Thread