[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/elisa 3eff22d4b6 53/98: Use new railways for info manua
From: |
ELPA Syncer |
Subject: |
[elpa] externals/elisa 3eff22d4b6 53/98: Use new railways for info manuals |
Date: |
Wed, 17 Jul 2024 18:58:04 -0400 (EDT) |
branch: externals/elisa
commit 3eff22d4b63171cbbc2eb86fe15685d958836bd3
Author: Sergey Kostyaev <kostyaev.sergey2@wb.ru>
Commit: Sergey Kostyaev <kostyaev.sergey2@wb.ru>
Use new railways for info manuals
---
elisa.el | 179 +++++++++++++++++++++++++++++++++------------------------------
1 file changed, 95 insertions(+), 84 deletions(-)
diff --git a/elisa.el b/elisa.el
index d78fa71184..09139a4de6 100644
--- a/elisa.el
+++ b/elisa.el
@@ -225,8 +225,7 @@ If set, all quotes with similarity less than threshold will
be filtered out."
(defun elisa-embeddings-create-table-sql ()
"Generate sql for create embeddings table."
- (format "create virtual table if not exists elisa_embeddings using
vss0(embedding(%d));"
- (elisa-get-embedding-size)))
+ "drop table if exists elisa_embeddings;")
(defun elisa-data-embeddings-create-table-sql ()
"Generate sql for create data embeddings table."
@@ -239,7 +238,7 @@ If set, all quotes with similarity less than threshold will
be filtered out."
(defun elisa-info-create-table-sql ()
"Generate sql for create info table."
- "create table if not exists info (node text unique);")
+ "drop table if exists info;")
(defun elisa-collections-create-table-sql ()
"Generate sql for create collections table."
@@ -309,6 +308,14 @@ FOREIGN KEY(collection_id) REFERENCES collections(rowid)
"(%s)"
(string-join (mapcar (lambda (id) (format "%d" id)) ids) ", ")))
+(defun elisa-sqlite-format-string-list (names)
+ "Convert list of string NAMES list to sqlite list representation."
+ (format
+ "(%s)"
+ (string-join (mapcar (lambda (name)
+ (format "'%s'"
+ (elisa-sqlite-escape name))) names) ", ")))
+
(defun elisa-avg (lst)
"Calculate arithmetic average value of LST."
(let ((len (length lst))
@@ -394,17 +401,84 @@ FOREIGN KEY(collection_id) REFERENCES collections(rowid)
(error
(setq continue nil))))))))
-(defun elisa-find-similar (text)
- "Find similar to TEXT results."
- (let ((embedding (llm-embedding elisa-embeddings-provider text)))
- (flatten-tree
+(defun elisa-find-similar (text collections)
+ "Find similar to TEXT results in COLLECTIONS."
+ (message "searching in collected data")
+ (let* ((rowids (mapcar
+ #'car
+ (sqlite-select
+ elisa-db
+ (format "select rowid from data where collection_id in
+ (
+SELECT rowid FROM collections WHERE name IN %s
+);" (elisa-sqlite-format-string-list collections)))))
+ (query (format "WITH
+vector_search AS (
+ SELECT rowid, distance
+ FROM data_embeddings
+ WHERE vss_search(embedding, %s)
+ ORDER BY distance ASC
+ LIMIT 40
+),
+semantic_search AS (
+ SELECT rowid, RANK () OVER (ORDER BY distance ASC) AS rank
+ FROM vector_search
+ WHERE rowid IN %s
+ ORDER BY distance ASC
+ LIMIT 20
+),
+keyword_search AS (
+ SELECT rowid, RANK () OVER (ORDER BY bm25(data_fts) ASC) AS rank
+ FROM data_fts
+ WHERE rowid in %s and data_fts MATCH '%s'
+ ORDER BY bm25(data_fts) ASC
+ LIMIT 20
+),
+hybrid_search AS (
+SELECT
+ COALESCE(semantic_search.rowid, keyword_search.rowid) AS rowid,
+ COALESCE(1.0 / (60 + semantic_search.rank), 0.0) +
+ COALESCE(1.0 / (60 + keyword_search.rank), 0.0) AS score
+FROM semantic_search
+FULL OUTER JOIN keyword_search ON semantic_search.rowid = keyword_search.rowid
+ORDER BY score DESC
+LIMIT %d
+)
+SELECT
+ hybrid_search.rowid
+FROM hybrid_search
+;
+"
+ (elisa-vector-to-sqlite
+ (llm-embedding elisa-embeddings-provider text))
+ (elisa-sqlite-format-int-list rowids)
+ (elisa-sqlite-format-int-list rowids)
+ (elisa-fts-query text)
+ (elisa-get-limit)))
+ (raw-ids (mapcar #'car (sqlite-select elisa-db query)))
+ (ids (if elisa-reranker-enabled
+ (elisa-rerank text raw-ids)
+ (take elisa-limit raw-ids))))
+ (mapc
+ (lambda (row)
+ (when-let ((kind (cl-first row))
+ (path (cl-second row))
+ (text (cl-third row)))
+ (pcase kind
+ ("web"
+ (ellama-context-add-webpage-quote-noninteractive path path text))
+ ("file"
+ (ellama-context-add-file-quote-noninteractive path text))
+ ("info"
+ (ellama-context-add-info-node-quote-noninteractive path text)))))
(sqlite-select
elisa-db
(format
- "select * from info where rowid in
-(select rowid from elisa_embeddings where vss_search(embedding,%s) limit %d);"
- (elisa-vector-to-sqlite embedding)
- elisa-limit)))))
+ "SELECT k.name, d.path, d.data
+FROM data AS d
+LEFT JOIN kinds k ON k.rowid = d.kind_id
+WHERE d.rowid in %s;"
+ (elisa-sqlite-format-int-list ids))))))
(defun elisa--split-by (func)
"Split buffer content to list by FUNC."
@@ -695,57 +769,7 @@ Return sqlite query that extract data for adding to
context."
rowid (elisa-sqlite-escape chunk))))))
(elisa-extact-webpage-chunks url))
(cl-incf collected-pages)))
- urls)
- (message "searching in collected data")
- (let* ((rowids (mapcar
- #'car
- (sqlite-select
- elisa-db
- (format "select rowid from data where collection_id = %s;"
collection-id))))
- (query (format "WITH
-vector_search AS (
- SELECT rowid, distance
- FROM data_embeddings
- WHERE vss_search(embedding, %s)
- ORDER BY distance ASC
- LIMIT 40
-),
-semantic_search AS (
- SELECT rowid, RANK () OVER (ORDER BY distance ASC) AS rank
- FROM vector_search
- WHERE rowid IN %s
- ORDER BY distance ASC
- LIMIT 20
-),
-keyword_search AS (
- SELECT rowid, RANK () OVER (ORDER BY bm25(data_fts) ASC) AS rank
- FROM data_fts
- WHERE rowid in %s and data_fts MATCH '%s'
- ORDER BY bm25(data_fts) ASC
- LIMIT 20
-),
-hybrid_search AS (
-SELECT
- COALESCE(semantic_search.rowid, keyword_search.rowid) AS rowid,
- COALESCE(1.0 / (60 + semantic_search.rank), 0.0) +
- COALESCE(1.0 / (60 + keyword_search.rank), 0.0) AS score
-FROM semantic_search
-FULL OUTER JOIN keyword_search ON semantic_search.rowid = keyword_search.rowid
-ORDER BY score DESC
-LIMIT %d
-)
-SELECT
- hybrid_search.rowid
-FROM hybrid_search
-;
-"
- (elisa-vector-to-sqlite
- (llm-embedding elisa-embeddings-provider prompt))
- (elisa-sqlite-format-int-list rowids)
- (elisa-sqlite-format-int-list rowids)
- (elisa-fts-query prompt)
- (elisa-get-limit))))
- query)))
+ urls)))
;;;###autoload
(defun elisa-web-search (prompt)
@@ -753,8 +777,9 @@ FROM hybrid_search
(interactive "sAsk elisa with web search: ")
(message "searching the web")
(elisa--async-do (lambda () (elisa--web-search prompt))
- (lambda (query)
- (elisa-retrieve-ask query prompt))))
+ (lambda (_)
+ (elisa-find-similar prompt (list prompt))
+ (ellama-chat prompt nil :provider elisa-chat-provider))))
(defun elisa-retrieve-ask (query prompt)
"Retrieve data with QUERY and ask elisa for PROMPT."
@@ -886,27 +911,13 @@ Call ON-DONE callback with result as an argument after
FUNC evaluation done."
(elisa--async-do 'elisa-parse-all-manuals))
;;;###autoload
-(defun elisa-chat (prompt)
- "Send PROMPT to elisa."
+(defun elisa-chat (prompt &optional collections)
+ "Send PROMPT to elisa.
+Find similar quotes in COLLECTIONS and add it to context."
(interactive "sAsk elisa: ")
- (if (and elisa-prompt-rewriting-enabled ellama--current-session-id)
- (ellama-chain
- prompt
- `((:provider elisa-chat-provider
- :session ,(with-current-buffer (ellama-get-session-buffer
ellama--current-session-id)
- ellama--current-session)
- :transform (lambda (s)
- (format elisa-rewrite-prompt-template s)))
- (:provider elisa-chat-provider
- :transform (lambda (s)
- (let ((unquoted (string-trim s "[ \t\n\r\"]+"
"[ \t\n\r\"]+"))
- (infos (elisa-find-similar unquoted)))
- (mapc #'ellama-context-add-info-node infos)
- ,prompt))
- :chat t)))
- (let ((infos (elisa-find-similar prompt)))
- (mapc #'ellama-context-add-info-node infos)
- (ellama-chat prompt nil :provider elisa-chat-provider))))
+ (let ((cols (or collections '("builtin manuals" "external manuals"))))
+ (elisa-find-similar prompt cols)
+ (ellama-chat prompt nil :provider elisa-chat-provider)))
(provide 'elisa)
;;; elisa.el ends here.
- [elpa] externals/elisa f744ce305a 67/98: Add reparse current collection command, (continued)
- [elpa] externals/elisa f744ce305a 67/98: Add reparse current collection command, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 9ad7827337 70/98: Fix semantic split with single chunk, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 439ed1d4f8 76/98: Make executable customization simpler, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa e5691f59c5 80/98: Make syncronous functions non-interactive, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa e92628390b 82/98: Update example configuration, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 39915439a4 84/98: Update installation instructions, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa fbfe3b4ae1 86/98: Merge pull request #12 from s-kostyaev/semantic-split, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 3882b9b322 87/98: Bump version, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 04d580f072 92/98: add vector- and vss-path to injected variables on async, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 1acc89545d 31/98: Merge pull request #13 from s-kostyaev/fix-builtin-manuals-parsing, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 3eff22d4b6 53/98: Use new railways for info manuals,
ELPA Syncer <=
- [elpa] externals/elisa 503083c0fb 58/98: Truncate long lines in done message, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 25e0df1dca 65/98: Create customization group, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 14af9ae960 66/98: Improve collection management, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 7460059992 85/98: Update CI, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa cdf3ece6b4 93/98: Merge pull request #19 from dabi/patch-1, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 3372452de2 94/98: Bump version, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 7cbb3bfc13 97/98: Merge pull request #18 from s-kostyaev/gnu-elpa-release, ELPA Syncer, 2024/07/17