guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[no subject]


From: Ludovic Courtès
Date: Thu, 9 Jan 2025 06:46:54 -0500 (EST)

branch: wip-event-logging
commit d81fa497bfd3690f061846e06bf1d9c0e43b1865
Author: Ludovic Courtès <ludo@gnu.org>
AuthorDate: Thu Jan 9 12:39:53 2025 +0100

    base: ‘remote-builder’ listens for notifications from ‘cuirass 
remote-server’.
    
    * src/cuirass/base.scm (%remote-server-socket-file-name): New variable.
    (remote-builder-listener): New procedure.
    (remote-builder): Add ‘socket’ parameter.  Call
    ‘remote-builder-listener’.  Handle ‘build-status-change’ messages.
    (spawn-remote-builder): Add call to ‘open-unix-listening-socket’ and
    pass it to ‘remote-builder’.
    * src/cuirass/scripts/remote-server.scm (%notification-socket-pool): New
    variable.
    (send-build-status-change-notification, update-build-status): New
    procedures.
    (run-fetch, serve-build-requests): Call ‘update-build-status’ instead of
    ‘db-update-build-status!’ and ‘set-build-successful!’.
    (open-build-notification-socket): New procedure.
    (cuirass-remote-server): Use it and parameterize 
‘%notification-socket-pool’.
    * tests/remote.scm (notification-server): New variable.
    (terminate-process): New procedure.
    (stop-worker, stop-server): Use it.
    (start-notification-server, stop-notification-server): New procedures.
    ("remote-server"): Call ‘start-notification-server’.
    ("clean-up"): Call ‘stop-notification-server’.
---
 src/cuirass/base.scm                  | 53 +++++++++++++++++--
 src/cuirass/scripts/remote-server.scm | 98 ++++++++++++++++++++++++-----------
 tests/remote.scm                      | 44 +++++++++++++---
 3 files changed, 152 insertions(+), 43 deletions(-)

diff --git a/src/cuirass/base.scm b/src/cuirass/base.scm
index 3bc9796..c5d7eb2 100644
--- a/src/cuirass/base.scm
+++ b/src/cuirass/base.scm
@@ -89,6 +89,7 @@
 
             ;; Parameters.
             %bridge-socket-file-name
+            %remote-server-socket-file-name
             %package-cachedir
             %fallback?))
 
@@ -165,9 +166,17 @@
                       (string-append (%cuirass-state-directory) "/run"))))
 
 (define %bridge-socket-file-name
+  ;; Socket 'cuirass register' listens to, allowing 'cuirass web' to connect
+  ;; to it.
   (make-parameter (string-append (%cuirass-run-state-directory)
                                  "/cuirass/bridge")))
 
+(define %remote-server-socket-file-name
+  ;; Socket 'cuirass register' listens to, allowing 'cuirass remote-server' to
+  ;; connect to it.
+  (make-parameter (string-append (%cuirass-run-state-directory)
+                                 "/cuirass/remote-builds")))
+
 
 ;;;
 ;;; Read parameters.
@@ -446,22 +455,56 @@ by handing them to the local build daemon."
     (spawn-fiber (local-builder channel))
     channel))
 
-(define (remote-builder channel)
+(define (remote-builder-listener socket channel)
+  "Spawn a server that accepts connections on SOCKET and forwards messages it
+reads to CHANNEL."
+  (define (serve-client client)
+    (let loop ()
+      (match (read client)
+        ((? eof-object?)
+         (log-info "terminating remote server client connection on EOF"))
+        (message
+         (put-message channel message)
+         (loop)))))
+
+  (lambda ()
+    (let loop ()
+      (match (accept socket (logior SOCK_NONBLOCK SOCK_CLOEXEC))
+        ((connection . peer)
+         (spawn-fiber (lambda ()
+                        (log-info "remote builder accepted connection: ~s"
+                                  connection)
+                        (serve-client connection)))
+         (loop))))))
+
+(define (remote-builder channel socket)
+  "Spawn a remote builder that accepts messages on CHANNEL and receives
+notifications from 'cuirass remote-server' over SOCKET."
   (lambda ()
     (log-info "builds will be delegated to 'cuirass remote-server'")
+    (spawn-fiber (remote-builder-listener socket channel))
+
     (let loop ()
       (match (get-message channel)
         (`(build ,derivations)
          ;; Currently there's nothing to do here: 'cuirass remote-server'
          ;; periodically calls 'db-get-pending-build'.
          ;; TODO: Push notifications to 'remote-server' instead.
-         (log-info "~a pending derivation builds" (length derivations))))
+         (log-info "~a pending derivation builds" (length derivations)))
+        (`(build-status-change ,derivation ,status)
+         ;; TODO: Handle database operations, notifications, etc. from here.
+         (log-info "status of '~a' changed to ~a" derivation status)))
       (loop))))
 
 (define (spawn-remote-builder)
-  "Spawn a build actor that performs builds using \"remote workers\"."
-  (let ((channel (make-channel)))
-    (spawn-fiber (remote-builder channel))
+  "Spawn a build actor that performs builds using \"remote workers\".  Return
+once ready to listen for incoming connections from 'cuirass remote-server'."
+  (log-info "listening for 'cuirass remote-server' notifications on '~a'"
+            (%remote-server-socket-file-name))
+  (let ((channel (make-channel))
+        (socket (open-unix-listening-socket
+                 (%remote-server-socket-file-name))))
+    (spawn-fiber (remote-builder channel socket))
     channel))
 
 
diff --git a/src/cuirass/scripts/remote-server.scm 
b/src/cuirass/scripts/remote-server.scm
index caeb8ed..ec4f55f 100644
--- a/src/cuirass/scripts/remote-server.scm
+++ b/src/cuirass/scripts/remote-server.scm
@@ -1,6 +1,6 @@
 ;;; remote-server.scm -- Remote build server.
 ;;; Copyright © 2020, 2021 Mathieu Othacehe <othacehe@gnu.org>
-;;; Copyright © 2023, 2024 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2023-2025 Ludovic Courtès <ludo@gnu.org>
 ;;;
 ;;; This file is part of Cuirass.
 ;;;
@@ -20,7 +20,8 @@
 (define-module (cuirass scripts remote-server)
   #:autoload   (cuirass base) (read-parameters
                                set-build-successful!
-                               spawn-build-maintainer)
+                               spawn-build-maintainer
+                               %remote-server-socket-file-name)
   #:use-module (cuirass config)
   #:use-module (cuirass database)
   #:use-module (cuirass logging)
@@ -87,6 +88,10 @@
 (define %publish-port
   (make-parameter #f))
 
+(define %notification-socket-pool
+  ;; Pool of sockets connected to 'cuirass register'.
+  (make-parameter #f))
+
 (define service-name
   "Cuirass remote server")
 
@@ -281,6 +286,22 @@ signing key for URL is not authorized."
                         #t))))
              outputs))))
 
+(define (send-build-status-change-notification drv status)
+  "Send a notification to 'cuirass register' that the status of DRV changed to
+STATUS."
+  (with-resource-from-pool (%notification-socket-pool) sock
+    (write `(build-status-change ,drv ,status) sock)
+    (newline sock)))
+
+(define* (update-build-status drv status
+                              #:key log-file)
+  "Change the status of DRV to STATUS, both in the database and by sending a
+notification to 'cuirass register'."
+  (if (= (build-status succeeded) status)
+      (set-build-successful! drv)
+      (db-update-build-status! drv status #:log-file log-file))
+  (send-build-status-change-notification drv status))
+
 (define* (run-fetch message)
   "Read MESSAGE and download the corresponding build outputs.  If
 %CACHE-DIRECTORY is set, download the matching NAR and NARINFO files in this
@@ -304,15 +325,15 @@ directory."
              (add-to-store drv outputs url))
            (begin
              (log-info "build succeeded: '~a'" drv)
-             (set-build-successful! drv))
+             (update-build-status drv (build-status succeeded)))
            (begin
              (log-error "failed to retrieve output of \
 successful build '~a'; rescheduling"
                         drv)
-             (db-update-build-status! drv (build-status scheduled))))))
+             (update-build-status drv (build-status scheduled))))))
     (('build-failed ('drv drv) ('url url) _ ...)
      (log-info "build failed: '~a'" drv)
-     (db-update-build-status! drv (build-status failed)))))
+     (update-build-status drv (build-status failed)))))
 
 (define (fetch-worker channel max-parallel-downloads)
   (define queue-size
@@ -522,7 +543,7 @@ Use WORKER-DIRECTORY to maintain the list of active 
workers."
                                        ('dependency dependency) _ ...)
              (log-info "build failed: dependency '~a' of '~a'"
                        dependency drv)
-             (db-update-build-status! drv (build-status failed-dependency))
+             (update-build-status drv (build-status failed-dependency))
              (put-message build-maintainer 'failed-dependency) ;mark 
'failed-dependency' builds
              (let ((parent (db-get-build drv)))
                (when parent
@@ -560,7 +581,7 @@ Use WORKER-DIRECTORY to maintain the list of active 
workers."
                                     (build-id build)
                                     derivation (build-system build)))
                        (db-update-build-worker! derivation name)
-                       (db-update-build-status! derivation (build-status 
submitted))
+                       (update-build-status derivation (build-status 
submitted))
                        (catch 'zmq-error
                          (lambda ()
                            (reply-worker
@@ -576,7 +597,7 @@ Use WORKER-DIRECTORY to maintain the list of active 
workers."
                                       (worker-name worker)
                                       (worker-address worker)
                                       message)
-                           (db-update-build-status! derivation
+                           (update-build-status derivation
                                                     (build-status 
scheduled)))))
                      (begin
                        (when worker
@@ -598,8 +619,8 @@ Use WORKER-DIRECTORY to maintain the list of active 
workers."
                            (worker-name worker)
                            drv))
                (db-update-build-worker! drv name)
-               (db-update-build-status! drv (build-status started)
-                                        #:log-file log-file)))
+               (update-build-status drv (build-status started)
+                                    #:log-file log-file)))
             (`(build-rejected (drv ,drv) (worker ,name))
              ;; Worker rejected the build, which might be either because the
              ;; derivation is unavailable or because of a transient error.  In
@@ -612,11 +633,11 @@ Use WORKER-DIRECTORY to maintain the list of active 
workers."
                  (begin
                    (log-warning "~a: build rejected: ~a; rescheduling"
                                 name drv)
-                   (db-update-build-status! drv (build-status scheduled)))
+                   (update-build-status drv (build-status scheduled)))
                  (begin
                    (log-warning "~a: build rejected: ~a; canceling"
                                 name drv)
-                   (db-update-build-status! drv (build-status canceled)))))
+                   (update-build-status drv (build-status canceled)))))
             (_
              (log-warning "ignoring unrecognized message: ~s" command)))))
 
@@ -692,6 +713,15 @@ exiting."
       (terminate-helper-processes)
       (primitive-exit 1))))
 
+(define (open-build-notification-socket)
+  "Return a socket connected to the 'cuirass register' process, used to send
+status updates."
+  (let ((sock (socket AF_UNIX
+                      (logior SOCK_STREAM SOCK_CLOEXEC SOCK_NONBLOCK)
+                      0)))
+    (connect sock AF_UNIX (%remote-server-socket-file-name))
+    sock))
+
 (define (cuirass-remote-server args)
   (signal-handler)
   (with-error-handling
@@ -786,24 +816,30 @@ exiting."
 
         (run-fibers
          (lambda ()
-           (with-database
-             (receive-logs log-port (%cache-directory))
-             (spawn-notification-fiber)
-             (spawn-build-log-cleaner (assoc-ref opts 'build-log-expiry))
-
-             (let ((fetch-worker (spawn-fetch-worker))
-                   (worker-directory (spawn-worker-directory)))
-               (catch 'zmq-error
-                 (lambda ()
-                   (serve-build-requests backend-port
-                                         fetch-worker
-                                         worker-directory
-                                         #:build-maintainer
-                                         (spawn-build-maintainer)))
-                 (lambda (key errno message . _)
-                   (log-error (G_ "ZeroMQ error in build server: ~a")
-                              message)
-                   (terminate-helper-processes)
-                   (primitive-exit 1))))))
+           (parameterize ((%notification-socket-pool
+                           (make-resource-pool
+                            (map (lambda (i)
+                                   (open-build-notification-socket))
+                                 (iota 8))
+                            'notification-socket)))
+             (with-database
+               (receive-logs log-port (%cache-directory))
+               (spawn-notification-fiber)
+               (spawn-build-log-cleaner (assoc-ref opts 'build-log-expiry))
+
+               (let ((fetch-worker (spawn-fetch-worker))
+                     (worker-directory (spawn-worker-directory)))
+                 (catch 'zmq-error
+                   (lambda ()
+                     (serve-build-requests backend-port
+                                           fetch-worker
+                                           worker-directory
+                                           #:build-maintainer
+                                           (spawn-build-maintainer)))
+                   (lambda (key errno message . _)
+                     (log-error (G_ "ZeroMQ error in build server: ~a")
+                                message)
+                     (terminate-helper-processes)
+                     (primitive-exit 1)))))))
          #:hz 0
          #:parallelism (min 8 (current-processor-count)))))))
diff --git a/tests/remote.scm b/tests/remote.scm
index 797c92e..3fa9299 100644
--- a/tests/remote.scm
+++ b/tests/remote.scm
@@ -29,6 +29,7 @@
                                  symbol))))
              (cuirass specification)
              ((cuirass remote) #:select (worker-systems))
+             ((cuirass base) #:select (spawn-remote-builder))
              (gnu packages base)
              (guix build utils)
              (guix channels)
@@ -39,6 +40,7 @@
              ((guix store) #:hide (build))
              ((guix utils) #:select (%current-system))
              (tests common)
+             (fibers)
              (squee)
              (simple-zmq)
              ((zlib) #:select (call-with-gzip-input-port))
@@ -57,6 +59,9 @@
 (define worker
   (make-parameter #f))
 
+(define notification-server
+  (make-parameter #f))
+
 (define (start-worker)
   (setenv "REQUEST_PERIOD" "1")
   (setenv "CUIRASS_LOGGING_LEVEL" "debug")
@@ -67,10 +72,12 @@
                    "--public-key=tests/signing-key.pub")
                  #:search-path? #t)))
 
+(define (terminate-process pid)
+  (kill pid SIGINT)
+  (waitpid pid))
+
 (define (stop-worker)
-  (let ((worker (worker)))
-    (kill worker SIGINT)
-    (waitpid worker)))
+  (terminate-process (worker)))
 
 (define (start-server)
   (mkdir-p "tests/cache")
@@ -85,9 +92,30 @@
                  #:search-path? #t)))
 
 (define (stop-server)
-  (let ((server (server)))
-    (kill server SIGINT)
-    (waitpid server)))
+  (terminate-process (server)))
+
+(define (start-notification-server)
+  ;; Spawn the notification server that normally runs as part of 'cuirass
+  ;; register', and which 'cuirass remote-server' connects to.  Do so in a
+  ;; separate process because 'run-fibers' installs suspendable ports, which
+  ;; this process may not be able to deal with.
+  (notification-server
+   (spawn "guile"
+          (list "guile" "-c"
+                (object->string
+                 '(begin
+                    (use-modules (cuirass base) (fibers))
+
+                    (setvbuf (current-output-port) 'none)
+                    (setvbuf (current-error-port) 'none)
+                    (run-fibers
+                     (lambda ()
+                       (spawn-remote-builder)
+                       (sleep 120))               ;wait
+                     #:drain? #t)))))))
+
+(define (stop-notification-server)
+  (terminate-process (notification-server)))
 
 (define* (dummy-drv #:optional sleep #:key (name "foo") dependency message)
   (let ((dependency (and=> dependency read-derivation-from-file)))
@@ -185,6 +213,7 @@
 
   (test-assert "remote-server"
     (begin
+      (start-notification-server)
       (start-server)
       #t))
 
@@ -384,4 +413,5 @@ Failing dependency ~s.\n"
   (test-assert "clean-up"
     (begin
       (stop-worker)
-      (stop-server))))
+      (stop-server)
+      (stop-notification-server))))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]