civodul pushed a commit to branch wip-guile-ssh in repository guix. commit c2ed7aa15a6ce51134af83d090240cba0aed5543 Author: Ludovic Courtès <l...@gnu.org> Date: Sun Mar 2 22:39:48 2014 +0100
DRAFT offload: Use Guile-SSH instead of GNU lsh. * guix/scripts/offload.scm (%lsh-command, %lshg-command) user-lsh-private-key): Remove. (user-openssh-private-key): New procedure. (open-ssh-session): New procedure. (remote-pipe): Remove 'mode' parameter. Rewrite in terms of 'open-ssh-session' etc. Update users. (send-files)[missing-files]: Rewrite using the bidirectional channel port. (machine-load): Remove exit status logic. --- guix/scripts/offload.scm | 210 ++++++++++++++++++++-------------------------- 1 file changed, 93 insertions(+), 117 deletions(-) diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm index b278f1e..0351caf 100644 --- a/guix/scripts/offload.scm +++ b/guix/scripts/offload.scm @@ -17,6 +17,10 @@ ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. (define-module (guix scripts offload) + #:use-module (ssh key) + #:use-module (ssh auth) + #:use-module (ssh session) + #:use-module (ssh channel) #:use-module (guix config) #:use-module (guix records) #:use-module (guix store) @@ -64,7 +68,7 @@ (system build-machine-system) ; string (user build-machine-user) ; string (private-key build-machine-private-key ; file name - (default (user-lsh-private-key))) + (default (user-openssh-private-key))) (parallel-builds build-machine-parallel-builds ; number (default 1)) (speed build-machine-speed ; inexact real @@ -85,19 +89,11 @@ ;; File that lists machines available as build slaves. (string-append %config-directory "/machines.scm")) -(define %lsh-command - "lsh") - -(define %lshg-command - ;; FIXME: 'lshg' fails to pass large amounts of data, see - ;; <http://lists.lysator.liu.se/pipermail/lsh-bugs/2014q1/000639.html>. - "lsh") - -(define (user-lsh-private-key) - "Return the user's default lsh private key, or #f if it could not be +(define (user-openssh-private-key) + "Return the user's default SSH private key, or #f if it could not be determined." (and=> (getenv "HOME") - (cut string-append <> "/.lsh/identity"))) + (cut string-append <> "/.ssh/id_rsa"))) (define %user-module ;; Module in which the machine description file is loaded. @@ -133,60 +129,50 @@ determined." (leave (_ "failed to load machine file '~a': ~s~%") file args)))))) -;;; FIXME: The idea was to open the connection to MACHINE once for all, but -;;; lshg is currently non-functional. -;; (define (open-ssh-gateway machine) -;; "Initiate an SSH connection gateway to MACHINE, and return the PID of the -;; running lsh gateway upon success, or #f on failure." -;; (catch 'system-error -;; (lambda () -;; (let* ((port (open-pipe* OPEN_READ %lsh-command -;; "-l" (build-machine-user machine) -;; "-i" (build-machine-private-key machine) -;; ;; XXX: With lsh 2.1, passing '--write-pid' -;; ;; last causes the PID not to be printed. -;; "--write-pid" "--gateway" "--background" -;; (build-machine-name machine))) -;; (line (read-line port)) -;; (status (close-pipe port))) -;; (if (zero? status) -;; (let ((pid (string->number line))) -;; (if (integer? pid) -;; pid -;; (begin -;; (warning (_ "'~a' did not write its PID on stdout: ~s~%") -;; %lsh-command line) -;; #f))) -;; (begin -;; (warning (_ "failed to initiate SSH connection to '~a':\ -;; '~a' exited with ~a~%") -;; (build-machine-name machine) -;; %lsh-command -;; (status:exit-val status)) -;; #f)))) -;; (lambda args -;; (leave (_ "failed to execute '~a': ~a~%") -;; %lsh-command (strerror (system-error-errno args)))))) - -(define-syntax with-error-to-port - (syntax-rules () - ((_ port exp0 exp ...) - (let ((new port) - (old (current-error-port))) - (dynamic-wind - (lambda () - (set-current-error-port new)) - (lambda () - exp0 exp ...) - (lambda () - (set-current-error-port old))))))) - -(define* (remote-pipe machine mode command - #:key (error-port (current-error-port)) (quote? #t)) - "Run COMMAND (a string list) on MACHINE, assuming an lsh gateway has been -set up. When QUOTE? is true, perform shell-quotation of all the elements of -COMMAND. Return either a pipe opened with MODE, or #f if the lsh client could -not be started." +(define (open-ssh-session machine) + "Open an SSH session for MACHINE and return it. Throw an error on failure." + (let ((private (private-key-from-file (build-machine-private-key machine))) + (public (public-key-from-file + (string-append (build-machine-private-key machine) + ".pub"))) + (session (make-session #:user (build-machine-user machine) + #:host (build-machine-name machine) + ;; #:log-verbosity 'protocol + #:identity (build-machine-private-key machine) + #:compression-level #f))) + (connect! session) + + ;; TODO: Use of known_hosts currently seems to be failing, and we'd prefer + ;; a 'server-public-key' field in 'build-machine'. + (let* ((auth (authenticate-server session)) + (server (get-server-public-key session)) + (digest (get-public-key-hash server 'sha1))) + (unless (eq? 'ok auth) + ;; FIXME: This should be an error we sometimes get 'found-other' + ;; because 'known_hosts' contains an ecdh-sha2-nistp256 key for the + ;; server whereas here we receive an ssh-rsa key, the reason being + ;; that libssh supports fewer algorithms than OpenSSH. It's OKish to + ;; ignore the error because we have a higher-level authentication + ;; layer for archives anyway. + (warning (_ "failed to authenticate server at '~a' with key ~a: ~s~%") + (build-machine-name machine) + (bytevector->base16-string digest) + auth))) + (let ((auth (userauth-public-key! session private))) + (unless (eq? 'success auth) + (leave (_ "SSH public key authentication failed: ~a~%") + (get-error session)))) + + session)) + +(define %channel->session + ;; Mapping of SSH channels to their corresponding session. + (make-weak-key-hash-table)) + +(define* (remote-pipe machine command + #:key (quote? #t)) + "Run COMMAND (a list) on MACHINE, and return an open input/output port. +When QUOTE? is true, perform shell-quotation of all the elements of COMMAND." (define (shell-quote str) ;; Sort-of shell-quote STR so it can be passed as an argument to the ;; shell. @@ -194,20 +180,19 @@ not be started." (lambda () (write str)))) - ;; Let the child inherit ERROR-PORT. - (with-error-to-port error-port - (apply open-pipe* mode %lshg-command - "-l" (build-machine-user machine) - "-p" (number->string (build-machine-port machine)) - - ;; XXX: Remove '-i' when %LSHG-COMMAND really is lshg. - "-i" (build-machine-private-key machine) + ;; TODO: Use (ssh popen) instead. + (let* ((session (open-ssh-session machine)) + (channel (make-channel session))) + ;; XXX: Work around a GC bug with Guile-SSH 0.6.0: make sure SESSION + ;; remains alive as long as CHANNEL is alive. + ;; (hashq-set! %channel->session channel session) - (append (build-machine-ssh-options machine) - (list (build-machine-name machine)) - (if quote? - (map shell-quote command) - command))))) + (channel-open-session channel) + (channel-request-exec channel + (string-join (if quote? + (map shell-quote command) + command))) + channel)) ;;; @@ -334,10 +319,11 @@ hook." (unless (= EEXIST (system-error-errno args)) (apply throw args))))))) - (let ((pipe (remote-pipe machine OPEN_READ + (let ((pipe (remote-pipe machine `("guile" "-c" ,(object->string script))))) (get-string-all pipe) - (let ((status (close-pipe pipe))) + (let ((status (channel-get-exit-status pipe))) + ;; FIXME: Check exit code. (unless (zero? status) ;; Better be safe than sorry: if we ignore the error here, then FILE ;; may be GC'd just before we start using it. @@ -366,10 +352,10 @@ hook." (false-if-exception (delete-file file))) roots))))) - (let ((pipe (remote-pipe machine OPEN_READ + (let ((pipe (remote-pipe machine `("guile" "-c" ,(object->string script))))) (get-string-all pipe) - (close-pipe pipe))) + (close pipe))) (define* (offload drv machine #:key print-build-trace? (max-silent-time 3600) @@ -383,7 +369,7 @@ there, and write the build log to LOG-PORT. Return the exit status." ;; Normally DRV has already been protected from GC when it was transferred. ;; The '-r' flag below prevents the build result from being GC'd. - (let ((pipe (remote-pipe machine OPEN_READ + (let ((pipe (remote-pipe machine `("guix" "build" "-r" ,%gc-root-file ,(format #f "--max-silent-time=~a" @@ -396,14 +382,15 @@ there, and write the build log to LOG-PORT. Return the exit status." ;; Since 'guix build' writes the build log to its ;; stderr, everything will go directly to LOG-PORT. - #:error-port log-port))) + ;; #:error-port log-port ;; FIXME + ))) (let loop ((line (read-line pipe))) (unless (eof-object? line) (display line log-port) (newline log-port) (loop (read-line pipe)))) - (close-pipe pipe))) + (close-port pipe))) (define* (transfer-and-offload drv machine #:key @@ -447,24 +434,16 @@ with exit code ~a~%" "Send the subset of FILES that's missing to MACHINE's store. Return #t on success, #f otherwise." (define (missing-files files) - ;; Return the subset of FILES not already on MACHINE. - (let*-values (((files) - (format #f "~{~a~%~}" files)) - ((missing pids) - (filtered-port - (append (list (which %lshg-command) - "-l" (build-machine-user machine) - "-p" (number->string - (build-machine-port machine)) - "-i" (build-machine-private-key machine)) - (build-machine-ssh-options machine) - (cons (build-machine-name machine) - '("guix" "archive" "--missing"))) - (open-input-string files))) - ((result) - (get-string-all missing))) - (for-each waitpid pids) - (string-tokenize result))) + ;; Return the subset of FILES not already on MACHINE. Use 'head' as a + ;; hack to make sure the remote end stops reading when we're done. + (let* ((pipe (remote-pipe machine + `("head" ,(string-append "-" + (number->string + (length files))) + "|" "guix" "archive" "--missing") + #:quote? #f))) + (format pipe "~{~a~%~}" files) + (string-tokenize (get-string-all pipe)))) (with-store store (guard (c ((nix-protocol-error? c) @@ -481,12 +460,13 @@ success, #f otherwise." ;; than xz: For a compression ratio 2 times larger, it is 20 times ;; faster. (let* ((files (missing-files (topologically-sorted store files))) - (pipe (remote-pipe machine OPEN_WRITE + (pipe (remote-pipe machine '("gzip" "-dc" "|" "guix" "archive" "--import") #:quote? #f))) (format #t (_ "sending ~a store files to '~a'...~%") (length files) (build-machine-name machine)) + ;; FIXME: PIPE is not a file port, so the following breaks. (call-with-compressed-output-port 'gzip pipe (lambda (compressed) (catch 'system-error @@ -498,15 +478,16 @@ success, #f otherwise." (strerror (system-error-errno args)))))) #:options '("--fast")) - ;; Wait for the 'lsh' process to complete. - (zero? (close-pipe pipe)))))) + ;; Wait for the remote process to complete. + (close pipe) + #t)))) (define (retrieve-files files machine) "Retrieve FILES from MACHINE's store, and import them." (define host (build-machine-name machine)) - (let ((pipe (remote-pipe machine OPEN_READ + (let ((pipe (remote-pipe machine `("guix" "archive" "--export" ,@files "|" "xz" "-c") #:quote? #f))) @@ -527,8 +508,7 @@ success, #f otherwise." #:log-port (current-error-port) #:lock? #f))) - ;; Wait for the 'lsh' process to complete. - (zero? (close-pipe pipe))))))) + (close-port pipe)))))) ;;; @@ -546,13 +526,9 @@ success, #f otherwise." (define (machine-load machine) "Return the load of MACHINE, divided by the number of parallel builds allowed on MACHINE." - (let* ((pipe (remote-pipe machine OPEN_READ `("cat" "/proc/loadavg"))) - (line (read-line pipe)) - (status (close-pipe pipe))) - (unless (eqv? 0 (status:exit-val status)) - (warning (_ "failed to obtain load of '~a': SSH client exited with ~a~%") - (build-machine-name machine) - (status:exit-val status))) + (let* ((pipe (remote-pipe machine '("cat" "/proc/loadavg"))) + (line (read-line pipe))) + (close-port pipe) (if (eof-object? line) +inf.0 ;MACHINE does not respond, so assume it is infinitely loaded