civodul pushed a commit to branch main
in repository shepherd.

commit 787d5a33aea061b5052faa0863c96be722440ce3
Author: Ludovic Courtès <[email protected]>
AuthorDate: Sun Apr 13 19:09:08 2025 +0200

    shepherd: Exit if another instance is listening on the socket.
    
    Partly fixes <https://issues.guix.gnu.org/76998>.
    
    * modules/shepherd/comm.scm (open-server-socket): Add #:false-if-in-use?
    and honor it.
    * modules/shepherd.scm (call-with-server-socket): Pass #:false-if-in-use?
    and exit when ‘open-server-socket’ returns #f.
    * tests/basic.sh: Test it.
    * NEWS: Update.
    
    Reported-by: Daniel Littlewood <[email protected]>
    Reported-by: Jake <[email protected]>
    Reported-by: Danny Milosavljevic <[email protected]>
---
 NEWS                      |  8 ++++++++
 modules/shepherd.scm      | 11 ++++++++++-
 modules/shepherd/comm.scm | 34 ++++++++++++++++++++++++++++------
 tests/basic.sh            |  4 ++++
 4 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/NEWS b/NEWS
index f843b62..96bfd70 100644
--- a/NEWS
+++ b/NEWS
@@ -86,6 +86,14 @@ The ‘system*’ and ‘system’ replacements in the ‘shepherd’ 
process no
 given command in the current directory rather than under
 (default-service-directory).
 
+** Refuse to start when another shepherd is listening on the socket
+   (<https://issues.guix.gnu.org/76998>)
+
+Starting an additional ‘shepherd’ as a user used to lead it to take control of
+the socket (by default /run/user/UID/shepherd/socket) even though another
+instance was already running and listening to that socket.  Since that
+behavior is undesirable, ‘shepherd’ now refuses to start in this situation.
+
 * Changes in 1.0.3
 
 ** ‘spawn-command’ now honors #:log-file
diff --git a/modules/shepherd.scm b/modules/shepherd.scm
index a1bfebb..bf58199 100644
--- a/modules/shepherd.scm
+++ b/modules/shepherd.scm
@@ -55,7 +55,16 @@
 socket file at FILE-NAME upon exit of PROC.  Return the values of PROC."
   (let ((sock (catch 'system-error
                 (lambda ()
-                  (open-server-socket file-name))
+                  (or (open-server-socket file-name
+                                          #:false-if-in-use?
+                                          (not (= 1 (getpid))))
+                      (begin
+                        ;; Refuse to start when another shepherd is already
+                        ;; listening on FILE-NAME.
+                        (report-error (l10n "shepherd instance already \
+listening on '~a'")
+                                      file-name)
+                        (exit 1))))
                 (lambda args
                   (match args
                     ((key proc . _)
diff --git a/modules/shepherd/comm.scm b/modules/shepherd/comm.scm
index 8c1050e..6e65716 100644
--- a/modules/shepherd/comm.scm
+++ b/modules/shepherd/comm.scm
@@ -105,17 +105,39 @@ return the socket."
                  (list errno) rest)))
       sock)))
 
-(define (open-server-socket file-name)
-  "Open a socket at FILE-NAME, and listen for connections there."
+(define* (open-server-socket file-name #:key false-if-in-use?)
+  "Open a socket at @var{file-name}, and listen for connections there.  When
+@var{false-if-in-use?} is true, attempt to connect to @var{file-name} first
+and return #f if the connection was successfully established--meaning a server
+is already listening on @var{file-name}."
   (with-fluids ((%default-port-encoding "UTF-8"))
     (let ((sock    (socket PF_UNIX
                            (logior SOCK_STREAM SOCK_NONBLOCK SOCK_CLOEXEC)
                            0))
           (address (make-socket-address AF_UNIX file-name)))
-      (catch-system-error (delete-file file-name))
-      (bind sock address)
-      (listen sock 10)
-      sock)))
+      (and (or (not false-if-in-use?)
+               ;; Try to connect to ADDRESS and return #f if that works.
+               ;; Since SOCK is non-blocking, retry a few times.
+               (catch 'system-error
+                 (lambda ()
+                   (let loop ((i 0))
+                     (unless (or (connect sock address) ;non-blocking
+                                 (> i 5))
+                       (sleep 1)
+                       (loop (+ 1 i))))
+                   (close-port sock)
+                   #f)
+                 (lambda args
+                   (or (memv (system-error-errno args)
+                             (list ENOENT ECONNREFUSED))
+                       (begin
+                         (close-port sock)
+                         #t)))))
+           (begin
+             (catch-system-error (delete-file file-name))
+             (bind sock address)
+             (listen sock 10)
+             sock)))))
 
 (define (read-command port)
   "Receive a command from PORT; return the command of #f if something went
diff --git a/tests/basic.sh b/tests/basic.sh
index 2214e78..975f7e3 100644
--- a/tests/basic.sh
+++ b/tests/basic.sh
@@ -86,6 +86,10 @@ shepherd_pid="`cat $pid`"
 
 kill -0 $shepherd_pid
 test -S "$socket"
+
+# Attempt to reuse $socket should fail.
+shepherd -I -s "$socket" -c "$conf" && false
+
 pristine_status=`$herd status -n 0` # Prep for 'reload' test.
 echo $pristine_status | grep -E '(Start.*root|Stop.*test)'
 

Reply via email to