[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
01/01: DRAFT offload: Use Guile-SSH instead of GNU lsh.
From: |
Ludovic Courtès |
Subject: |
01/01: DRAFT offload: Use Guile-SSH instead of GNU lsh. |
Date: |
Sun, 9 Oct 2016 21:26:11 +0000 (UTC) |
civodul pushed a commit to branch wip-guile-ssh
in repository guix.
commit c2ed7aa15a6ce51134af83d090240cba0aed5543
Author: Ludovic Courtès <address@hidden>
Date: Sun Mar 2 22:39:48 2014 +0100
DRAFT offload: Use Guile-SSH instead of GNU lsh.
* guix/scripts/offload.scm (%lsh-command, %lshg-command)
user-lsh-private-key): Remove.
(user-openssh-private-key): New procedure.
(open-ssh-session): New procedure.
(remote-pipe): Remove 'mode' parameter. Rewrite in terms of
'open-ssh-session' etc. Update users.
(send-files)[missing-files]: Rewrite using the bidirectional channel
port.
(machine-load): Remove exit status logic.
---
guix/scripts/offload.scm | 210 ++++++++++++++++++++--------------------------
1 file changed, 93 insertions(+), 117 deletions(-)
diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm
index b278f1e..0351caf 100644
--- a/guix/scripts/offload.scm
+++ b/guix/scripts/offload.scm
@@ -17,6 +17,10 @@
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
(define-module (guix scripts offload)
+ #:use-module (ssh key)
+ #:use-module (ssh auth)
+ #:use-module (ssh session)
+ #:use-module (ssh channel)
#:use-module (guix config)
#:use-module (guix records)
#:use-module (guix store)
@@ -64,7 +68,7 @@
(system build-machine-system) ; string
(user build-machine-user) ; string
(private-key build-machine-private-key ; file name
- (default (user-lsh-private-key)))
+ (default (user-openssh-private-key)))
(parallel-builds build-machine-parallel-builds ; number
(default 1))
(speed build-machine-speed ; inexact real
@@ -85,19 +89,11 @@
;; File that lists machines available as build slaves.
(string-append %config-directory "/machines.scm"))
-(define %lsh-command
- "lsh")
-
-(define %lshg-command
- ;; FIXME: 'lshg' fails to pass large amounts of data, see
- ;; <http://lists.lysator.liu.se/pipermail/lsh-bugs/2014q1/000639.html>.
- "lsh")
-
-(define (user-lsh-private-key)
- "Return the user's default lsh private key, or #f if it could not be
+(define (user-openssh-private-key)
+ "Return the user's default SSH private key, or #f if it could not be
determined."
(and=> (getenv "HOME")
- (cut string-append <> "/.lsh/identity")))
+ (cut string-append <> "/.ssh/id_rsa")))
(define %user-module
;; Module in which the machine description file is loaded.
@@ -133,60 +129,50 @@ determined."
(leave (_ "failed to load machine file '~a': ~s~%")
file args))))))
-;;; FIXME: The idea was to open the connection to MACHINE once for all, but
-;;; lshg is currently non-functional.
-;; (define (open-ssh-gateway machine)
-;; "Initiate an SSH connection gateway to MACHINE, and return the PID of the
-;; running lsh gateway upon success, or #f on failure."
-;; (catch 'system-error
-;; (lambda ()
-;; (let* ((port (open-pipe* OPEN_READ %lsh-command
-;; "-l" (build-machine-user machine)
-;; "-i" (build-machine-private-key machine)
-;; ;; XXX: With lsh 2.1, passing '--write-pid'
-;; ;; last causes the PID not to be printed.
-;; "--write-pid" "--gateway" "--background"
-;; (build-machine-name machine)))
-;; (line (read-line port))
-;; (status (close-pipe port)))
-;; (if (zero? status)
-;; (let ((pid (string->number line)))
-;; (if (integer? pid)
-;; pid
-;; (begin
-;; (warning (_ "'~a' did not write its PID on stdout: ~s~%")
-;; %lsh-command line)
-;; #f)))
-;; (begin
-;; (warning (_ "failed to initiate SSH connection to '~a':\
-;; '~a' exited with ~a~%")
-;; (build-machine-name machine)
-;; %lsh-command
-;; (status:exit-val status))
-;; #f))))
-;; (lambda args
-;; (leave (_ "failed to execute '~a': ~a~%")
-;; %lsh-command (strerror (system-error-errno args))))))
-
-(define-syntax with-error-to-port
- (syntax-rules ()
- ((_ port exp0 exp ...)
- (let ((new port)
- (old (current-error-port)))
- (dynamic-wind
- (lambda ()
- (set-current-error-port new))
- (lambda ()
- exp0 exp ...)
- (lambda ()
- (set-current-error-port old)))))))
-
-(define* (remote-pipe machine mode command
- #:key (error-port (current-error-port)) (quote? #t))
- "Run COMMAND (a string list) on MACHINE, assuming an lsh gateway has been
-set up. When QUOTE? is true, perform shell-quotation of all the elements of
-COMMAND. Return either a pipe opened with MODE, or #f if the lsh client could
-not be started."
+(define (open-ssh-session machine)
+ "Open an SSH session for MACHINE and return it. Throw an error on failure."
+ (let ((private (private-key-from-file (build-machine-private-key machine)))
+ (public (public-key-from-file
+ (string-append (build-machine-private-key machine)
+ ".pub")))
+ (session (make-session #:user (build-machine-user machine)
+ #:host (build-machine-name machine)
+ ;; #:log-verbosity 'protocol
+ #:identity (build-machine-private-key machine)
+ #:compression-level #f)))
+ (connect! session)
+
+ ;; TODO: Use of known_hosts currently seems to be failing, and we'd prefer
+ ;; a 'server-public-key' field in 'build-machine'.
+ (let* ((auth (authenticate-server session))
+ (server (get-server-public-key session))
+ (digest (get-public-key-hash server 'sha1)))
+ (unless (eq? 'ok auth)
+ ;; FIXME: This should be an error we sometimes get 'found-other'
+ ;; because 'known_hosts' contains an ecdh-sha2-nistp256 key for the
+ ;; server whereas here we receive an ssh-rsa key, the reason being
+ ;; that libssh supports fewer algorithms than OpenSSH. It's OKish to
+ ;; ignore the error because we have a higher-level authentication
+ ;; layer for archives anyway.
+ (warning (_ "failed to authenticate server at '~a' with key ~a: ~s~%")
+ (build-machine-name machine)
+ (bytevector->base16-string digest)
+ auth)))
+ (let ((auth (userauth-public-key! session private)))
+ (unless (eq? 'success auth)
+ (leave (_ "SSH public key authentication failed: ~a~%")
+ (get-error session))))
+
+ session))
+
+(define %channel->session
+ ;; Mapping of SSH channels to their corresponding session.
+ (make-weak-key-hash-table))
+
+(define* (remote-pipe machine command
+ #:key (quote? #t))
+ "Run COMMAND (a list) on MACHINE, and return an open input/output port.
+When QUOTE? is true, perform shell-quotation of all the elements of COMMAND."
(define (shell-quote str)
;; Sort-of shell-quote STR so it can be passed as an argument to the
;; shell.
@@ -194,20 +180,19 @@ not be started."
(lambda ()
(write str))))
- ;; Let the child inherit ERROR-PORT.
- (with-error-to-port error-port
- (apply open-pipe* mode %lshg-command
- "-l" (build-machine-user machine)
- "-p" (number->string (build-machine-port machine))
-
- ;; XXX: Remove '-i' when %LSHG-COMMAND really is lshg.
- "-i" (build-machine-private-key machine)
+ ;; TODO: Use (ssh popen) instead.
+ (let* ((session (open-ssh-session machine))
+ (channel (make-channel session)))
+ ;; XXX: Work around a GC bug with Guile-SSH 0.6.0: make sure SESSION
+ ;; remains alive as long as CHANNEL is alive.
+ ;; (hashq-set! %channel->session channel session)
- (append (build-machine-ssh-options machine)
- (list (build-machine-name machine))
- (if quote?
- (map shell-quote command)
- command)))))
+ (channel-open-session channel)
+ (channel-request-exec channel
+ (string-join (if quote?
+ (map shell-quote command)
+ command)))
+ channel))
;;;
@@ -334,10 +319,11 @@ hook."
(unless (= EEXIST (system-error-errno args))
(apply throw args)))))))
- (let ((pipe (remote-pipe machine OPEN_READ
+ (let ((pipe (remote-pipe machine
`("guile" "-c" ,(object->string script)))))
(get-string-all pipe)
- (let ((status (close-pipe pipe)))
+ (let ((status (channel-get-exit-status pipe)))
+ ;; FIXME: Check exit code.
(unless (zero? status)
;; Better be safe than sorry: if we ignore the error here, then FILE
;; may be GC'd just before we start using it.
@@ -366,10 +352,10 @@ hook."
(false-if-exception (delete-file file)))
roots)))))
- (let ((pipe (remote-pipe machine OPEN_READ
+ (let ((pipe (remote-pipe machine
`("guile" "-c" ,(object->string script)))))
(get-string-all pipe)
- (close-pipe pipe)))
+ (close pipe)))
(define* (offload drv machine
#:key print-build-trace? (max-silent-time 3600)
@@ -383,7 +369,7 @@ there, and write the build log to LOG-PORT. Return the
exit status."
;; Normally DRV has already been protected from GC when it was transferred.
;; The '-r' flag below prevents the build result from being GC'd.
- (let ((pipe (remote-pipe machine OPEN_READ
+ (let ((pipe (remote-pipe machine
`("guix" "build"
"-r" ,%gc-root-file
,(format #f "--max-silent-time=~a"
@@ -396,14 +382,15 @@ there, and write the build log to LOG-PORT. Return the
exit status."
;; Since 'guix build' writes the build log to its
;; stderr, everything will go directly to LOG-PORT.
- #:error-port log-port)))
+ ;; #:error-port log-port ;; FIXME
+ )))
(let loop ((line (read-line pipe)))
(unless (eof-object? line)
(display line log-port)
(newline log-port)
(loop (read-line pipe))))
- (close-pipe pipe)))
+ (close-port pipe)))
(define* (transfer-and-offload drv machine
#:key
@@ -447,24 +434,16 @@ with exit code ~a~%"
"Send the subset of FILES that's missing to MACHINE's store. Return #t on
success, #f otherwise."
(define (missing-files files)
- ;; Return the subset of FILES not already on MACHINE.
- (let*-values (((files)
- (format #f "~{~a~%~}" files))
- ((missing pids)
- (filtered-port
- (append (list (which %lshg-command)
- "-l" (build-machine-user machine)
- "-p" (number->string
- (build-machine-port machine))
- "-i" (build-machine-private-key machine))
- (build-machine-ssh-options machine)
- (cons (build-machine-name machine)
- '("guix" "archive" "--missing")))
- (open-input-string files)))
- ((result)
- (get-string-all missing)))
- (for-each waitpid pids)
- (string-tokenize result)))
+ ;; Return the subset of FILES not already on MACHINE. Use 'head' as a
+ ;; hack to make sure the remote end stops reading when we're done.
+ (let* ((pipe (remote-pipe machine
+ `("head" ,(string-append "-"
+ (number->string
+ (length files)))
+ "|" "guix" "archive" "--missing")
+ #:quote? #f)))
+ (format pipe "~{~a~%~}" files)
+ (string-tokenize (get-string-all pipe))))
(with-store store
(guard (c ((nix-protocol-error? c)
@@ -481,12 +460,13 @@ success, #f otherwise."
;; than xz: For a compression ratio 2 times larger, it is 20 times
;; faster.
(let* ((files (missing-files (topologically-sorted store files)))
- (pipe (remote-pipe machine OPEN_WRITE
+ (pipe (remote-pipe machine
'("gzip" "-dc" "|"
"guix" "archive" "--import")
#:quote? #f)))
(format #t (_ "sending ~a store files to '~a'...~%")
(length files) (build-machine-name machine))
+ ;; FIXME: PIPE is not a file port, so the following breaks.
(call-with-compressed-output-port 'gzip pipe
(lambda (compressed)
(catch 'system-error
@@ -498,15 +478,16 @@ success, #f otherwise."
(strerror (system-error-errno args))))))
#:options '("--fast"))
- ;; Wait for the 'lsh' process to complete.
- (zero? (close-pipe pipe))))))
+ ;; Wait for the remote process to complete.
+ (close pipe)
+ #t))))
(define (retrieve-files files machine)
"Retrieve FILES from MACHINE's store, and import them."
(define host
(build-machine-name machine))
- (let ((pipe (remote-pipe machine OPEN_READ
+ (let ((pipe (remote-pipe machine
`("guix" "archive" "--export" ,@files
"|" "xz" "-c")
#:quote? #f)))
@@ -527,8 +508,7 @@ success, #f otherwise."
#:log-port (current-error-port)
#:lock? #f)))
- ;; Wait for the 'lsh' process to complete.
- (zero? (close-pipe pipe)))))))
+ (close-port pipe))))))
;;;
@@ -546,13 +526,9 @@ success, #f otherwise."
(define (machine-load machine)
"Return the load of MACHINE, divided by the number of parallel builds
allowed on MACHINE."
- (let* ((pipe (remote-pipe machine OPEN_READ `("cat" "/proc/loadavg")))
- (line (read-line pipe))
- (status (close-pipe pipe)))
- (unless (eqv? 0 (status:exit-val status))
- (warning (_ "failed to obtain load of '~a': SSH client exited with ~a~%")
- (build-machine-name machine)
- (status:exit-val status)))
+ (let* ((pipe (remote-pipe machine '("cat" "/proc/loadavg")))
+ (line (read-line pipe)))
+ (close-port pipe)
(if (eof-object? line)
+inf.0 ;MACHINE does not respond, so assume it is infinitely loaded