guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

03/05: Store the distribution of derivations related to packages


From: Christopher Baines
Subject: 03/05: Store the distribution of derivations related to packages
Date: Thu, 9 Mar 2023 04:18:19 -0500 (EST)

cbaines pushed a commit to branch master
in repository data-service.

commit e39c9da028a9a29a0212cdb0287b0046bb786c32
Author: Christopher Baines <mail@cbaines.net>
AuthorDate: Thu Mar 9 08:29:39 2023 +0000

    Store the distribution of derivations related to packages
    
    This might be generally useful, but I've been looking at it as it offers a 
way
    to try and improve query performance when you want to select all the
    derivations related to the packages for a revision.
    
    The data looks like this (for a specified system and target):
    
    ┌───────┬───────┐
    │ level │ count │
    ├───────┼───────┤
    │    15 │     2 │
    │    14 │     3 │
    │    13 │     3 │
    │    12 │     3 │
    │    11 │    14 │
    │    10 │    25 │
    │     9 │    44 │
    │     8 │    91 │
    │     7 │  1084 │
    │     6 │   311 │
    │     5 │   432 │
    │     4 │   515 │
    │     3 │   548 │
    │     2 │  2201 │
    │     1 │ 21162 │
    │     0 │ 22310 │
    └───────┴───────┘
    
    Level 0 reflects the number of packages. Level 1 is similar as you have all
    the derivations for the package origins. The remaining levels contain less
    packages since it's mostly just derivations involved in bootstrapping.
    
    When using a recursive CTE to collect all the derivations, PostgreSQL 
assumes
    that the each derivation has the same number of inputs, and this leads to a
    large overestimation of the number of derivations per a revision. This in 
turn
    can lead to PostgreSQL picking a slower way of running the query.
    
    When it's known how many new derivations you should see at each level, it's
    possible to inform PostgreSQL this by using LIMIT's at various points in the
    query. This reassures the query planner that it's not going to be handling
    lots of rows and helps it make better decisions about how to execute the
    query.
---
 guix-data-service/jobs/load-new-guix-revision.scm  |   8 +-
 .../model/guix-revision-package-derivation.scm     | 257 ++++++++++++++++++++-
 scripts/guix-data-service.in                       |   8 +
 ...sion_package_derivation_distribution_counts.sql |  13 ++
 ...sion_package_derivation_distribution_counts.sql |   7 +
 sqitch/sqitch.plan                                 |   1 +
 ...sion_package_derivation_distribution_counts.sql |   7 +
 7 files changed, 299 insertions(+), 2 deletions(-)

diff --git a/guix-data-service/jobs/load-new-guix-revision.scm 
b/guix-data-service/jobs/load-new-guix-revision.scm
index 7eb4425..7c9a772 100644
--- a/guix-data-service/jobs/load-new-guix-revision.scm
+++ b/guix-data-service/jobs/load-new-guix-revision.scm
@@ -1595,7 +1595,13 @@ WHERE job_id = $1")
                                package-derivation-ids)
               (simple-format
                #t "Successfully loaded ~A package/derivation pairs\n"
-               ids-count))))
+               ids-count))
+
+            (with-time-logging
+                "insert-guix-revision-package-derivation-distribution-counts"
+              (insert-guix-revision-package-derivation-distribution-counts
+               conn
+               guix-revision-id))))
         #t)
       (lambda (key . args)
         (simple-format (current-error-port)
diff --git a/guix-data-service/model/guix-revision-package-derivation.scm 
b/guix-data-service/model/guix-revision-package-derivation.scm
index 5095b42..cb73114 100644
--- a/guix-data-service/model/guix-revision-package-derivation.scm
+++ b/guix-data-service/model/guix-revision-package-derivation.scm
@@ -16,8 +16,16 @@
 ;;; <http://www.gnu.org/licenses/>.
 
 (define-module (guix-data-service model guix-revision-package-derivation)
+  #:use-module (ice-9 match)
+  #:use-module (ice-9 threads)
   #:use-module (squee)
-  #:export (insert-guix-revision-package-derivations))
+  #:use-module (guix-data-service database)
+  #:export (insert-guix-revision-package-derivations
+
+            insert-guix-revision-package-derivation-distribution-counts
+            backfill-guix-revision-package-derivation-distribution-counts
+
+            get-sql-to-select-package-and-related-derivations-for-revision))
 
 (define (insert-guix-revision-package-derivations
          conn guix-revision-id package-derivation-ids)
@@ -35,3 +43,250 @@
                    ";"))
 
   (exec-query conn insert))
+
+(define (insert-guix-revision-package-derivation-distribution-counts
+         conn
+         guix-revision-id)
+  (define system-ids-and-targets
+    (exec-query
+     conn
+     "
+SELECT DISTINCT system_id, target
+FROM package_derivations
+INNER JOIN guix_revision_package_derivations
+  ON package_derivations.id = 
guix_revision_package_derivations.package_derivation_id
+WHERE revision_id = $1"
+     (list guix-revision-id)))
+
+  (define (get-count-for-next-level system target level-counts)
+    (define next-level
+      (length level-counts))
+
+    (define query
+      (string-append
+       (simple-format
+        #f
+        "
+WITH l0 AS (
+  SELECT derivation_id
+  FROM package_derivations
+  INNER JOIN guix_revision_package_derivations
+    ON package_derivations.id
+        = guix_revision_package_derivations.package_derivation_id
+  WHERE revision_id = 19411
+    AND system_id = 4
+    AND target = $STR$$STR$~A
+)"
+        (if (= next-level 0)
+            ""
+            (simple-format
+             #f
+             "
+  LIMIT ~A"
+             (car level-counts))))
+       (if (= next-level 0)
+           ""
+           (string-join
+            (map
+             (match-lambda*
+               ((level count)
+                (simple-format
+                 #f
+                 ", l~A AS (
+  (
+    SELECT derivation_outputs.derivation_id
+    FROM derivation_outputs WHERE derivation_outputs.id IN (
+      SELECT DISTINCT derivation_inputs.derivation_output_id
+      FROM l~A
+      INNER JOIN derivation_inputs
+        ON l~A.derivation_id = derivation_inputs.derivation_id
+    )
+  ) EXCEPT (~A
+  )~A
+)"
+                 level
+                 (- level 1)
+                 (- level 1)
+                 (string-join
+                  (map
+                   (lambda (level)
+                     (simple-format
+                      #f
+                      "      SELECT derivation_id FROM l~A"
+                      level))
+                   (iota level))
+                  "
+    UNION ALL")
+                 (if count
+                     (simple-format
+                      #f
+                      "
+  LIMIT ~A"
+                      count)
+                     ""))))
+             (iota (length level-counts) 1)
+             (append (cdr level-counts) '(#f)))
+            ""))
+       (simple-format
+        #f
+        "
+SELECT COUNT(*) FROM l~A"
+        (length level-counts))))
+
+    (string->number
+     (caar
+      (exec-query
+       conn
+       query))))
+
+  (define (insert-level-count system-id target level count)
+    (exec-query
+     conn
+     "
+INSERT INTO guix_revision_package_derivation_distribution_counts
+VALUES ($1, $2, $3, $4, $5)"
+     (list guix-revision-id
+           system-id
+           target
+           (number->string level)
+           (number->string count))))
+
+  (for-each
+   (match-lambda
+     ((system-id target)
+
+      (let loop ((level-counts '()))
+        (let ((level (length level-counts))
+              (count (get-count-for-next-level system-id target level-counts)))
+          (unless (= count 0)
+            (insert-level-count system-id target level count)
+            (loop (append level-counts (list count))))))))
+   system-ids-and-targets))
+
+(define (backfill-guix-revision-package-derivation-distribution-counts)
+  (define revision-ids
+    (with-thread-postgresql-connection
+     (lambda (conn)
+       (map
+        car
+        (exec-query
+         conn
+         "
+  SELECT id
+  FROM guix_revisions
+EXCEPT
+  SELECT guix_revision_id
+  FROM guix_revision_package_derivation_distribution_counts
+ORDER BY id DESC")))))
+
+  (n-par-for-each
+   4
+   (lambda (revision-id)
+     (simple-format #t "backfilling 
guix_revision_package_derivation_distribution_counts for revision ~A\n" 
revision-id)
+     (with-thread-postgresql-connection
+      (lambda (conn)
+        (with-postgresql-transaction
+         conn
+         (lambda (conn)
+           (insert-guix-revision-package-derivation-distribution-counts
+            conn
+            revision-id))))))
+   revision-ids))
+
+(define* (get-sql-to-select-package-and-related-derivations-for-revision
+          conn
+          guix-revision-id
+          #:key system-id target)
+  (define level-counts
+    (map
+     (match-lambda
+       ((level count)
+        (list
+         (string->number level)
+         (string->number count))))
+     (exec-query
+      conn
+      "
+SELECT level, distinct_derivations
+FROM guix_revision_package_derivation_distribution_counts
+WHERE guix_revision_id = $1
+  AND system_id = $2
+  AND target = $3
+ORDER BY level ASC"
+      (list guix-revision-id
+            (number->string system-id)
+            target))))
+
+  (define (query level-counts)
+    (string-append
+     (simple-format
+      #f
+      "
+WITH l0 AS (
+  SELECT derivation_id
+  FROM package_derivations
+  INNER JOIN guix_revision_package_derivations
+    ON package_derivations.id
+        = guix_revision_package_derivations.package_derivation_id
+  WHERE revision_id = ~A
+    AND system_id = ~A
+    AND target = $STR$~A$STR$
+  LIMIT ~A
+)"
+      guix-revision-id
+      system-id
+      target
+      (cdr (car level-counts)))
+     (string-join
+      (map
+       (match-lambda*
+         ((level count)
+          (simple-format
+           #f
+           ", l~A AS (
+  (
+    SELECT derivation_outputs.derivation_id
+    FROM derivation_outputs WHERE derivation_outputs.id IN (
+      SELECT DISTINCT derivation_inputs.derivation_output_id
+      FROM l~A
+      INNER JOIN derivation_inputs
+        ON l~A.derivation_id = derivation_inputs.derivation_id
+    )
+  ) EXCEPT (~A
+  )~A
+)"
+           level
+           (- level 1)
+           (- level 1)
+           (string-join
+            (map
+             (lambda (level)
+               (simple-format
+                #f
+                "      SELECT derivation_id FROM l~A"
+                level))
+             (iota level))
+            "
+    UNION ALL")
+           (simple-format
+            #f
+            "
+  LIMIT ~A"
+            count))))
+       (iota (- (length level-counts) 1) 1)
+       (cdr (map cdr level-counts)))
+      "")
+     ", all_derivations AS (
+  SELECT *
+  FROM l0"
+     (string-join
+      (map (lambda (level)
+             (simple-format #f "  UNION (SELECT * FROM l~A)" level))
+           (iota (- (length level-counts) 1) 1))
+      "\n")
+     "
+)"))
+
+  (if level-counts
+      (query level-counts)
+      #f))
diff --git a/scripts/guix-data-service.in b/scripts/guix-data-service.in
index 7653562..d8b9b5a 100644
--- a/scripts/guix-data-service.in
+++ b/scripts/guix-data-service.in
@@ -37,6 +37,7 @@
              (guix-data-service config)
              (guix-data-service database)
              (guix-data-service substitutes)
+             (guix-data-service model guix-revision-package-derivation)
              (guix-data-service web server)
              (guix-data-service web controller)
              (guix-data-service web nar controller))
@@ -217,6 +218,13 @@
 
            (pid-file (assq-ref opts 'pid-file)))
 
+      (call-with-new-thread
+       (lambda ()
+         (with-postgresql-connection-per-thread
+          "backfill"
+          (lambda ()
+            (backfill-guix-revision-package-derivation-distribution-counts)))))
+
       (when pid-file
         (call-with-output-file pid-file
           (lambda (port)
diff --git 
a/sqitch/deploy/guix_revision_package_derivation_distribution_counts.sql 
b/sqitch/deploy/guix_revision_package_derivation_distribution_counts.sql
new file mode 100644
index 0000000..58829c5
--- /dev/null
+++ b/sqitch/deploy/guix_revision_package_derivation_distribution_counts.sql
@@ -0,0 +1,13 @@
+-- Deploy 
guix-data-service:guix_revision_package_derivation_distribution_counts to pg
+
+BEGIN;
+
+CREATE TABLE guix_revision_package_derivation_distribution_counts (
+  guix_revision_id integer NOT NULL REFERENCES guix_revisions (id),
+  system_id integer NOT NULL REFERENCES systems (id),
+  target varchar NOT NULL,
+  level integer NOT NULL,
+  distinct_derivations integer NOT NULL
+);
+
+COMMIT;
diff --git 
a/sqitch/revert/guix_revision_package_derivation_distribution_counts.sql 
b/sqitch/revert/guix_revision_package_derivation_distribution_counts.sql
new file mode 100644
index 0000000..7956b30
--- /dev/null
+++ b/sqitch/revert/guix_revision_package_derivation_distribution_counts.sql
@@ -0,0 +1,7 @@
+-- Revert 
guix-data-service:guix_revision_package_derivation_distribution_counts from pg
+
+BEGIN;
+
+-- XXX Add DDLs here.
+
+COMMIT;
diff --git a/sqitch/sqitch.plan b/sqitch/sqitch.plan
index 64b2189..caab662 100644
--- a/sqitch/sqitch.plan
+++ b/sqitch/sqitch.plan
@@ -93,3 +93,4 @@ blocked_builds 2022-11-07T11:27:28Z Chris <chris@felis> # Add 
blocked_builds
 package_derivations_extended_statistics 2022-11-12T10:40:18Z Chris 
<chris@felis> # Add extended statistics on package_derivations
 derivation_outputs_id_and_derivation_id_idx 2022-11-12T10:41:42Z Chris 
<chris@felis> # Add index on derivation_outputs id and derivation_id
 blocked_builds_blocked_builds_blocked_derivation_output_details_set_id_2 
2023-03-05T10:19:53Z Chris <chris@felis> # Add index on 
blocked_builds_blocked_derivation_output_details_set_id
+guix_revision_package_derivation_distribution_counts 2023-03-08T16:53:44Z 
Chris <chris@felis> # Add guix_revision_package_derivation_distribution_counts 
table
diff --git 
a/sqitch/verify/guix_revision_package_derivation_distribution_counts.sql 
b/sqitch/verify/guix_revision_package_derivation_distribution_counts.sql
new file mode 100644
index 0000000..1f7edd2
--- /dev/null
+++ b/sqitch/verify/guix_revision_package_derivation_distribution_counts.sql
@@ -0,0 +1,7 @@
+-- Verify 
guix-data-service:guix_revision_package_derivation_distribution_counts on pg
+
+BEGIN;
+
+-- XXX Add verifications here.
+
+ROLLBACK;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]