[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Ludovic Courtès |
Date: |
Thu, 9 Jan 2025 06:46:54 -0500 (EST) |
branch: wip-event-logging
commit d81fa497bfd3690f061846e06bf1d9c0e43b1865
Author: Ludovic Courtès <ludo@gnu.org>
AuthorDate: Thu Jan 9 12:39:53 2025 +0100
base: ‘remote-builder’ listens for notifications from ‘cuirass
remote-server’.
* src/cuirass/base.scm (%remote-server-socket-file-name): New variable.
(remote-builder-listener): New procedure.
(remote-builder): Add ‘socket’ parameter. Call
‘remote-builder-listener’. Handle ‘build-status-change’ messages.
(spawn-remote-builder): Add call to ‘open-unix-listening-socket’ and
pass it to ‘remote-builder’.
* src/cuirass/scripts/remote-server.scm (%notification-socket-pool): New
variable.
(send-build-status-change-notification, update-build-status): New
procedures.
(run-fetch, serve-build-requests): Call ‘update-build-status’ instead of
‘db-update-build-status!’ and ‘set-build-successful!’.
(open-build-notification-socket): New procedure.
(cuirass-remote-server): Use it and parameterize
‘%notification-socket-pool’.
* tests/remote.scm (notification-server): New variable.
(terminate-process): New procedure.
(stop-worker, stop-server): Use it.
(start-notification-server, stop-notification-server): New procedures.
("remote-server"): Call ‘start-notification-server’.
("clean-up"): Call ‘stop-notification-server’.
---
src/cuirass/base.scm | 53 +++++++++++++++++--
src/cuirass/scripts/remote-server.scm | 98 ++++++++++++++++++++++++-----------
tests/remote.scm | 44 +++++++++++++---
3 files changed, 152 insertions(+), 43 deletions(-)
diff --git a/src/cuirass/base.scm b/src/cuirass/base.scm
index 3bc9796..c5d7eb2 100644
--- a/src/cuirass/base.scm
+++ b/src/cuirass/base.scm
@@ -89,6 +89,7 @@
;; Parameters.
%bridge-socket-file-name
+ %remote-server-socket-file-name
%package-cachedir
%fallback?))
@@ -165,9 +166,17 @@
(string-append (%cuirass-state-directory) "/run"))))
(define %bridge-socket-file-name
+ ;; Socket 'cuirass register' listens to, allowing 'cuirass web' to connect
+ ;; to it.
(make-parameter (string-append (%cuirass-run-state-directory)
"/cuirass/bridge")))
+(define %remote-server-socket-file-name
+ ;; Socket 'cuirass register' listens to, allowing 'cuirass remote-server' to
+ ;; connect to it.
+ (make-parameter (string-append (%cuirass-run-state-directory)
+ "/cuirass/remote-builds")))
+
;;;
;;; Read parameters.
@@ -446,22 +455,56 @@ by handing them to the local build daemon."
(spawn-fiber (local-builder channel))
channel))
-(define (remote-builder channel)
+(define (remote-builder-listener socket channel)
+ "Spawn a server that accepts connections on SOCKET and forwards messages it
+reads to CHANNEL."
+ (define (serve-client client)
+ (let loop ()
+ (match (read client)
+ ((? eof-object?)
+ (log-info "terminating remote server client connection on EOF"))
+ (message
+ (put-message channel message)
+ (loop)))))
+
+ (lambda ()
+ (let loop ()
+ (match (accept socket (logior SOCK_NONBLOCK SOCK_CLOEXEC))
+ ((connection . peer)
+ (spawn-fiber (lambda ()
+ (log-info "remote builder accepted connection: ~s"
+ connection)
+ (serve-client connection)))
+ (loop))))))
+
+(define (remote-builder channel socket)
+ "Spawn a remote builder that accepts messages on CHANNEL and receives
+notifications from 'cuirass remote-server' over SOCKET."
(lambda ()
(log-info "builds will be delegated to 'cuirass remote-server'")
+ (spawn-fiber (remote-builder-listener socket channel))
+
(let loop ()
(match (get-message channel)
(`(build ,derivations)
;; Currently there's nothing to do here: 'cuirass remote-server'
;; periodically calls 'db-get-pending-build'.
;; TODO: Push notifications to 'remote-server' instead.
- (log-info "~a pending derivation builds" (length derivations))))
+ (log-info "~a pending derivation builds" (length derivations)))
+ (`(build-status-change ,derivation ,status)
+ ;; TODO: Handle database operations, notifications, etc. from here.
+ (log-info "status of '~a' changed to ~a" derivation status)))
(loop))))
(define (spawn-remote-builder)
- "Spawn a build actor that performs builds using \"remote workers\"."
- (let ((channel (make-channel)))
- (spawn-fiber (remote-builder channel))
+ "Spawn a build actor that performs builds using \"remote workers\". Return
+once ready to listen for incoming connections from 'cuirass remote-server'."
+ (log-info "listening for 'cuirass remote-server' notifications on '~a'"
+ (%remote-server-socket-file-name))
+ (let ((channel (make-channel))
+ (socket (open-unix-listening-socket
+ (%remote-server-socket-file-name))))
+ (spawn-fiber (remote-builder channel socket))
channel))
diff --git a/src/cuirass/scripts/remote-server.scm
b/src/cuirass/scripts/remote-server.scm
index caeb8ed..ec4f55f 100644
--- a/src/cuirass/scripts/remote-server.scm
+++ b/src/cuirass/scripts/remote-server.scm
@@ -1,6 +1,6 @@
;;; remote-server.scm -- Remote build server.
;;; Copyright © 2020, 2021 Mathieu Othacehe <othacehe@gnu.org>
-;;; Copyright © 2023, 2024 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2023-2025 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of Cuirass.
;;;
@@ -20,7 +20,8 @@
(define-module (cuirass scripts remote-server)
#:autoload (cuirass base) (read-parameters
set-build-successful!
- spawn-build-maintainer)
+ spawn-build-maintainer
+ %remote-server-socket-file-name)
#:use-module (cuirass config)
#:use-module (cuirass database)
#:use-module (cuirass logging)
@@ -87,6 +88,10 @@
(define %publish-port
(make-parameter #f))
+(define %notification-socket-pool
+ ;; Pool of sockets connected to 'cuirass register'.
+ (make-parameter #f))
+
(define service-name
"Cuirass remote server")
@@ -281,6 +286,22 @@ signing key for URL is not authorized."
#t))))
outputs))))
+(define (send-build-status-change-notification drv status)
+ "Send a notification to 'cuirass register' that the status of DRV changed to
+STATUS."
+ (with-resource-from-pool (%notification-socket-pool) sock
+ (write `(build-status-change ,drv ,status) sock)
+ (newline sock)))
+
+(define* (update-build-status drv status
+ #:key log-file)
+ "Change the status of DRV to STATUS, both in the database and by sending a
+notification to 'cuirass register'."
+ (if (= (build-status succeeded) status)
+ (set-build-successful! drv)
+ (db-update-build-status! drv status #:log-file log-file))
+ (send-build-status-change-notification drv status))
+
(define* (run-fetch message)
"Read MESSAGE and download the corresponding build outputs. If
%CACHE-DIRECTORY is set, download the matching NAR and NARINFO files in this
@@ -304,15 +325,15 @@ directory."
(add-to-store drv outputs url))
(begin
(log-info "build succeeded: '~a'" drv)
- (set-build-successful! drv))
+ (update-build-status drv (build-status succeeded)))
(begin
(log-error "failed to retrieve output of \
successful build '~a'; rescheduling"
drv)
- (db-update-build-status! drv (build-status scheduled))))))
+ (update-build-status drv (build-status scheduled))))))
(('build-failed ('drv drv) ('url url) _ ...)
(log-info "build failed: '~a'" drv)
- (db-update-build-status! drv (build-status failed)))))
+ (update-build-status drv (build-status failed)))))
(define (fetch-worker channel max-parallel-downloads)
(define queue-size
@@ -522,7 +543,7 @@ Use WORKER-DIRECTORY to maintain the list of active
workers."
('dependency dependency) _ ...)
(log-info "build failed: dependency '~a' of '~a'"
dependency drv)
- (db-update-build-status! drv (build-status failed-dependency))
+ (update-build-status drv (build-status failed-dependency))
(put-message build-maintainer 'failed-dependency) ;mark
'failed-dependency' builds
(let ((parent (db-get-build drv)))
(when parent
@@ -560,7 +581,7 @@ Use WORKER-DIRECTORY to maintain the list of active
workers."
(build-id build)
derivation (build-system build)))
(db-update-build-worker! derivation name)
- (db-update-build-status! derivation (build-status
submitted))
+ (update-build-status derivation (build-status
submitted))
(catch 'zmq-error
(lambda ()
(reply-worker
@@ -576,7 +597,7 @@ Use WORKER-DIRECTORY to maintain the list of active
workers."
(worker-name worker)
(worker-address worker)
message)
- (db-update-build-status! derivation
+ (update-build-status derivation
(build-status
scheduled)))))
(begin
(when worker
@@ -598,8 +619,8 @@ Use WORKER-DIRECTORY to maintain the list of active
workers."
(worker-name worker)
drv))
(db-update-build-worker! drv name)
- (db-update-build-status! drv (build-status started)
- #:log-file log-file)))
+ (update-build-status drv (build-status started)
+ #:log-file log-file)))
(`(build-rejected (drv ,drv) (worker ,name))
;; Worker rejected the build, which might be either because the
;; derivation is unavailable or because of a transient error. In
@@ -612,11 +633,11 @@ Use WORKER-DIRECTORY to maintain the list of active
workers."
(begin
(log-warning "~a: build rejected: ~a; rescheduling"
name drv)
- (db-update-build-status! drv (build-status scheduled)))
+ (update-build-status drv (build-status scheduled)))
(begin
(log-warning "~a: build rejected: ~a; canceling"
name drv)
- (db-update-build-status! drv (build-status canceled)))))
+ (update-build-status drv (build-status canceled)))))
(_
(log-warning "ignoring unrecognized message: ~s" command)))))
@@ -692,6 +713,15 @@ exiting."
(terminate-helper-processes)
(primitive-exit 1))))
+(define (open-build-notification-socket)
+ "Return a socket connected to the 'cuirass register' process, used to send
+status updates."
+ (let ((sock (socket AF_UNIX
+ (logior SOCK_STREAM SOCK_CLOEXEC SOCK_NONBLOCK)
+ 0)))
+ (connect sock AF_UNIX (%remote-server-socket-file-name))
+ sock))
+
(define (cuirass-remote-server args)
(signal-handler)
(with-error-handling
@@ -786,24 +816,30 @@ exiting."
(run-fibers
(lambda ()
- (with-database
- (receive-logs log-port (%cache-directory))
- (spawn-notification-fiber)
- (spawn-build-log-cleaner (assoc-ref opts 'build-log-expiry))
-
- (let ((fetch-worker (spawn-fetch-worker))
- (worker-directory (spawn-worker-directory)))
- (catch 'zmq-error
- (lambda ()
- (serve-build-requests backend-port
- fetch-worker
- worker-directory
- #:build-maintainer
- (spawn-build-maintainer)))
- (lambda (key errno message . _)
- (log-error (G_ "ZeroMQ error in build server: ~a")
- message)
- (terminate-helper-processes)
- (primitive-exit 1))))))
+ (parameterize ((%notification-socket-pool
+ (make-resource-pool
+ (map (lambda (i)
+ (open-build-notification-socket))
+ (iota 8))
+ 'notification-socket)))
+ (with-database
+ (receive-logs log-port (%cache-directory))
+ (spawn-notification-fiber)
+ (spawn-build-log-cleaner (assoc-ref opts 'build-log-expiry))
+
+ (let ((fetch-worker (spawn-fetch-worker))
+ (worker-directory (spawn-worker-directory)))
+ (catch 'zmq-error
+ (lambda ()
+ (serve-build-requests backend-port
+ fetch-worker
+ worker-directory
+ #:build-maintainer
+ (spawn-build-maintainer)))
+ (lambda (key errno message . _)
+ (log-error (G_ "ZeroMQ error in build server: ~a")
+ message)
+ (terminate-helper-processes)
+ (primitive-exit 1)))))))
#:hz 0
#:parallelism (min 8 (current-processor-count)))))))
diff --git a/tests/remote.scm b/tests/remote.scm
index 797c92e..3fa9299 100644
--- a/tests/remote.scm
+++ b/tests/remote.scm
@@ -29,6 +29,7 @@
symbol))))
(cuirass specification)
((cuirass remote) #:select (worker-systems))
+ ((cuirass base) #:select (spawn-remote-builder))
(gnu packages base)
(guix build utils)
(guix channels)
@@ -39,6 +40,7 @@
((guix store) #:hide (build))
((guix utils) #:select (%current-system))
(tests common)
+ (fibers)
(squee)
(simple-zmq)
((zlib) #:select (call-with-gzip-input-port))
@@ -57,6 +59,9 @@
(define worker
(make-parameter #f))
+(define notification-server
+ (make-parameter #f))
+
(define (start-worker)
(setenv "REQUEST_PERIOD" "1")
(setenv "CUIRASS_LOGGING_LEVEL" "debug")
@@ -67,10 +72,12 @@
"--public-key=tests/signing-key.pub")
#:search-path? #t)))
+(define (terminate-process pid)
+ (kill pid SIGINT)
+ (waitpid pid))
+
(define (stop-worker)
- (let ((worker (worker)))
- (kill worker SIGINT)
- (waitpid worker)))
+ (terminate-process (worker)))
(define (start-server)
(mkdir-p "tests/cache")
@@ -85,9 +92,30 @@
#:search-path? #t)))
(define (stop-server)
- (let ((server (server)))
- (kill server SIGINT)
- (waitpid server)))
+ (terminate-process (server)))
+
+(define (start-notification-server)
+ ;; Spawn the notification server that normally runs as part of 'cuirass
+ ;; register', and which 'cuirass remote-server' connects to. Do so in a
+ ;; separate process because 'run-fibers' installs suspendable ports, which
+ ;; this process may not be able to deal with.
+ (notification-server
+ (spawn "guile"
+ (list "guile" "-c"
+ (object->string
+ '(begin
+ (use-modules (cuirass base) (fibers))
+
+ (setvbuf (current-output-port) 'none)
+ (setvbuf (current-error-port) 'none)
+ (run-fibers
+ (lambda ()
+ (spawn-remote-builder)
+ (sleep 120)) ;wait
+ #:drain? #t)))))))
+
+(define (stop-notification-server)
+ (terminate-process (notification-server)))
(define* (dummy-drv #:optional sleep #:key (name "foo") dependency message)
(let ((dependency (and=> dependency read-derivation-from-file)))
@@ -185,6 +213,7 @@
(test-assert "remote-server"
(begin
+ (start-notification-server)
(start-server)
#t))
@@ -384,4 +413,5 @@ Failing dependency ~s.\n"
(test-assert "clean-up"
(begin
(stop-worker)
- (stop-server))))
+ (stop-server)
+ (stop-notification-server))))