guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

01/01: guix: Add filesearch draft.


From: guix-commits
Subject: 01/01: guix: Add filesearch draft.
Date: Thu, 13 Aug 2020 11:50:43 -0400 (EDT)

ambrevar pushed a commit to branch wip-filesearch
in repository guix.

commit 49b52c2c7be03caf3636632c31f4451d5bc88125
Author: Pierre Neidhardt <mail@ambrevar.xyz>
AuthorDate: Tue Aug 4 17:34:44 2020 +0200

    guix: Add filesearch draft.
    
    * guix/scripts/filesearch.scm: New file.
    * guix/scripts/schema.sql: New file.
---
 guix/scripts/filesearch.scm | 222 ++++++++++++++++++++++++++++++++++++++++++++
 guix/scripts/schema.sql     |  15 +++
 2 files changed, 237 insertions(+)

diff --git a/guix/scripts/filesearch.scm b/guix/scripts/filesearch.scm
new file mode 100644
index 0000000..a409dc1
--- /dev/null
+++ b/guix/scripts/filesearch.scm
@@ -0,0 +1,222 @@
+;;; GNU Guix --- Functional package management for GNU
+;;; Copyright © 2020 Pierre Neidhardt <mail@ambrevar.xyz>
+;;;
+;;; This file is part of GNU Guix.
+;;;
+;;; GNU Guix is free software; you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; GNU Guix is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (guix scripts filesearch)
+  #:use-module (guix config)            ; For %guix-version.
+  #:use-module (sqlite3)
+  #:use-module (guix gexp)              ; For lower-object.
+  #:use-module (guix packages)
+  #:use-module (guix store)
+  #:use-module (guix store database)
+  #:use-module (guix monads)
+  #:use-module (guix grafts)
+  #:use-module (guix records)
+  #:use-module (guix derivations)
+  #:use-module (guix utils)             ; For cache-directory
+  #:use-module (gnu packages)
+  #:use-module (ice-9 match)
+  #:use-module (ice-9 format)
+  #:use-module (ice-9 ftw))
+
+;; TODO: We need to remove package duplicates.
+;; Using "insert or replace ... on conflict (path) do nothing" moves database
+;; generation time from 30s to 100s.
+;;
+;; Remove duplicates afterwards?
+
+;; TODO: Vacuum database?  When?
+;; https://sqlite.org/lang_vacuum.html
+
+(define %db (format #f "~a/files.db" (cache-directory)))
+(define %schema (search-path %load-path "guix/scripts/schema.sql"))
+
+(define-syntax-rule (with-statement db sql stmt exp ...) ; TODO: From (guix 
store database)
+  "Run EXP... with STMT bound to a prepared statement corresponding to the sql
+string SQL for DB."
+  ((@@ (guix store database) call-with-statement) db sql ; TODO: Export?
+   (lambda (stmt) exp ...)))
+
+(define* (add-files db
+                    #:key
+                    (name (error "Missing argument"))
+                    (system (error "Missing argument"))
+                    (output "out")
+                    (path (error "Missing argument"))
+                    (files (error "Missing argument"))
+                    (version (error "Missing argument"))
+                    (guix-version (error "Missing argument")))
+  "FILES is a list of path underneath PATH."
+  (sqlite-exec
+   db
+   (string-append "insert into Packages (name, system, output, path, version, 
guix)"
+                  ;; "insert or replace into Packages (name, system, output, 
path, version, guix)"
+                  (format #f " values (~s, ~s, ~s, ~s, ~s, ~s)"
+                          name system output path version guix-version)
+                  ;; " on conflict (path) do nothing"
+                  ))
+  (let ((id ((@@ (guix store database) last-insert-row-id) db))) ; TODO: 
Export?
+    (for-each
+     (lambda (file)
+       (sqlite-exec
+        db
+        (string-append "insert into Files (subpath, package) "
+                       (format #f " values (~s, ~s)"
+                               file id))))
+     files)))
+
+(define (directory-files path)
+  "Return a list of all files within PATH, recursively.
+Each file is returned as the path relative to PATH, starting with a '/'.
+Empty directories are ignored.
+
+It's important that the first character be the directory separator because it
+gives more expressive power for search.  For instance, searching \"/bin\"
+matches both \"/bin/foo\" and \"/usr/bin/foo\" but not \"barbin\"."
+  (let ((file-list '()))
+    (ftw path
+         (lambda (filename statinfo flag)
+           (when (eq? flag 'regular)
+             (set! file-list (cons (string-drop filename (string-length path))
+                                   file-list))) #t))
+    file-list))
+
+(define-record-type* <package-store-items> package-store-items 
make-package-store-items
+  package-store-items?
+  this-package-store-items
+  (system package-store-items-system)
+  (output-paths package-store-items-output-paths))
+
+(define* (package-store-info package)
+  "Return store items, even if not present locally."
+  (define (lower-object/no-grafts obj system) ; From (guix scripts weather)
+    (mlet* %store-monad ((previous (set-grafting #f))
+                         (drv (lower-object obj system))
+                         (_ (set-grafting previous)))
+      (return drv)))
+  (with-store store
+    (run-with-store store
+      (mlet %store-monad ((drv (lower-object/no-grafts package 
(%current-system))))
+        ;; Note: we don't try building DRV like 'guix archive' does
+        ;; because we don't have to since we can instead rely on
+        ;; substitute meta-data.
+        (return
+         (package-store-items
+          (system (derivation-system drv))
+          (output-paths (derivation->output-paths drv))))))))
+
+(define (persist-package-files db package)
+  (let* ((info (package-store-info package))
+         (system (package-store-items-system info))
+         (output-path-pairs (package-store-items-output-paths info)))
+    (map (match-lambda
+           ((output . path)
+            ;; TODO: Don't list files if entry is already in database.
+            ;; TODO: Try fetching info from remote substitute server database.
+            (when (file-exists? path)
+              (add-files db         ; TODO: Merge this function and add-files?
+                         #:name (package-name package)
+                         #:system system
+                         #:output output
+                         #:path path ; Storing /gnu/store for all packages has 
no significant size cost.
+                         #:version (package-version package)
+                         #:guix-version %guix-version
+                         #:files (directory-files path)))))
+         output-path-pairs)))
+
+(define (search-file-package pattern)
+  "Return corresponding packages.
+Packages or ordered by most relevant last.
+Path is subject to SQLite \"full-text search\" pattern matching.
+See https://www.sqlite.org/fts5.html.
+
+Example patterns:
+
+- \"foo bar\": Both the \"foo\" and \"bar\" full words are in the path.
+- \"bar foo\": Same as above, order does not matter.
+- \"foo*\": Matches any word starting with \"foo\".
+- \"foo OR bar\": Either \"foo\" or \"bar\" full words are in the path."
+  (with-database %db db
+    (with-statement
+        db
+        ;; REVIEW: Is this inner join cheap?
+        (string-append
+         "select subpath, name, version, output"
+         " from Files inner join Packages on Files.package = Packages.id"
+         (format #f " where Files.subpath match '~a' order by rank" pattern))
+      stmt
+      (map vector->list
+           (sqlite-fold cons '() stmt)))))
+
+(define (format-search search-result)
+  (for-each
+   (match-lambda
+     ((subpath name version output)
+      (format #t "~a:~a@~a~/~a~%"
+              name output version subpath)))
+   search-result))
+
+(define (persist-all-local-packages)
+  "Return number of persisted packages."
+  (parameterize ((sql-schema %schema))
+    (with-database %db db
+      ;; It's important to persist all entries in a single transaction to
+      ;; avoid a performance bottleneck.  See
+      ;; https://www.sqlite.org/fts5.html.
+      ((@@ (guix store database) call-with-transaction) ; TODO: Export?
+       db
+       (lambda ()
+         (fold-packages
+          (lambda (package count)
+            (persist-package-files db package)
+            (+ 1 count))
+          1))))))
+
+
+(define (test-missing-package)
+  (package-store-info
+   (@@ (gnu packages chromium) ungoogled-chromium)))
+
+(define (test-index-git)
+  (parameterize ((sql-schema %schema))
+    (with-database %db db
+      (persist-package-files db (@@ (gnu packages version-control) git)))))
+
+(define (test-search)
+  (test-index-git)
+  (format-search (search-file-package "git perl5")))
+
+;; TODO: Catch case we don't have a derivation.
+
+;; TODO: Sync databases with substitute server: SQLite diffs?  Binary diff
+;; with xdelta (probably not since it would send entries for Guix versions
+;; that the user does not have).
+
+;; Statistics
+;;
+;; Context:
+;; - 14,000 packages
+;; - 1700 store items
+;; - CPU 3.5 GHz
+;; - SSD
+;;
+;; Results:
+;; - Database generation time: 30 seconds.
+;; - Database size: 31 MiB.
+;; - Database Zstd-compressed size: 6.1 MiB.
+;; - Zstd-compression time: 0.13 seconds.
+;; - FTS queries: < 0.01 seconds.
diff --git a/guix/scripts/schema.sql b/guix/scripts/schema.sql
new file mode 100644
index 0000000..ce90de2
--- /dev/null
+++ b/guix/scripts/schema.sql
@@ -0,0 +1,15 @@
+create table if not exists Packages (
+    id integer primary key autoincrement not null,
+    name        text not null,
+    output      text default "out",
+    system      text not null,
+    path        text not null, -- store path, e.g. /gnu/store/abcd...-foo
+    -- path        text unique not null, -- TODO: Make unique?  Maybe to slow.
+    version     text not null,
+    guix        text not null
+);
+
+create virtual table if not exists Files using fts5(
+    subpath,
+    package
+);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]