Re: [Libguestfs] [PATCH libnbd v2 2/2] api: Implement local command with systemd socket activation.

Eric Blake Tue, 01 Oct 2019 06:27:16 -0700

On 9/30/19 11:32 AM, Richard W.M. Jones wrote:

This adds new APIs for running a local NBD server and connecting to it
using systemd socket activation (instead of stdin/stdout).


This includes interop tests against nbdkit and qemu-nbd which I
believe are the only NBD servers supporting socket activation.  (If we
find others then we can add more interop tests in future.)

The upstream spec for systemd socket activation is here:
http://0pointer.de/blog/projects/socket-activation.html
---

+++ b/generator/states-connect-socket-activation.c

+/* This is baked into the systemd socket activation API. */
+#define FIRST_SOCKET_ACTIVATION_FD 3
+
+/* Prepare environment for calling execvpe when doing systemd socket
+ * activation.  Takes the current environment and copies it.  Removes
+ * any existing LISTEN_PID or LISTEN_FDS and replaces them with new
+ * variables.  env[0] is "LISTEN_PID=..." which is filled in by
+ * CONNECT_SA.START, and env[1] is "LISTEN_FDS=1".
+ */

I know that getenv()/setenv()/putenv() tend to prefer sorted environ,but I also think that exec HAS to handle a hand-built environ that isnot sorted, so you should be okay with this shortcut.

+static char **
+prepare_socket_activation_environment (void)
+{
+  char **env = NULL;
+  char *p0 = NULL, *p1 = NULL;
+  size_t i, len;
+  void *vp;
+
+  p0 = strdup ("LISTEN_PID=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
+  if (p0 == NULL)
+    goto err;
+  p1 = strdup ("LISTEN_FDS=1");
+  if (p1 == NULL)
+    goto err;
+
+  /* Copy the current environment. */
+  env = nbd_internal_copy_string_list (environ);

POSIX says the external symbol 'environ' has to exist for linkingpurposes, but also states that no standard header is required to declareit. You may want to add an 'extern char **environ;' line before thisfunction for portability. On the other hand, gnulib documents that onnewer Mac OS, even that doesn't work, where the solution is '#defineenviron (*_NSGetEnviron())'. I guess we'll deal with it when somebodyactually reports compilation failure.

+
+  /* Remove any existing LISTEN_PID or LISTEN_FDS instances. */
+  for (i = 2; env[i] != NULL; ++i) {
+    if (strncmp (env[i], "LISTEN_PID=", 11) == 0 ||
+        strncmp (env[i], "LISTEN_FDS=", 11) == 0) {
+      memmove (&env[i], &env[i+1],
+               sizeof (char *) * (nbd_internal_string_list_length (&env[i])));
+      i--;
+    }
+  }

Lots of O(N) traversals of the list, but this probably isn't our hotspot, and so probably not worth optimizing further.

+STATE_MACHINE {
+ CONNECT_SA.START:
+#ifdef HAVE_EXECVPE
+  size_t len;
+  int s;
+  struct sockaddr_un addr;
+  char **env;
+  pid_t pid;
+  int flags;
+
+  assert (!h->sock);
+  assert (h->argv);
+  assert (h->argv[0]);
+
+  /* Use /tmp instead of TMPDIR because we must ensure the path is
+   * short enough to store in the sockaddr_un.  On some platforms this
+   * may cause problems so we may need to revisit it.  XXX
+   */
+  h->sa_tmpdir = strdup ("/tmp/libnbdXXXXXX");
+  if (h->sa_tmpdir == NULL) {
+    SET_NEXT_STATE (%.DEAD);
+    set_error (errno, "strdup");
+    return 0;
+  }
+  if (mkdtemp (h->sa_tmpdir) == NULL) {
+    SET_NEXT_STATE (%.DEAD);
+    set_error (errno, "mkdtemp");
+    /* Avoid cleanup in nbd_close. */
+    free (h->sa_tmpdir);
+    h->sa_tmpdir = NULL;
+    return 0;
+  }
+
+  h->sa_sockpath = strdup ("/tmp/libnbdXXXXXX/sock");
+  if (h->sa_sockpath == NULL) {
+    SET_NEXT_STATE (%.DEAD);
+    set_error (errno, "strdup");
+    return 0;
+  }
+
+  len = strlen (h->sa_tmpdir);
+  memcpy (h->sa_sockpath, h->sa_tmpdir, len);


Is it worth using:

asprintf (&h->sa_sockpath, "%s/sock", h->sa_tmpdir);

for less code? asprintf might not be standard, but we already requireexecvpe, which probably means asprintf is available. But youropen-coded variant works, too.

+
+  s = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+  if (s == -1) {
+    SET_NEXT_STATE (%.DEAD);
+    set_error (errno, "socket");
+    return 0;
+  }


I guess the child process can add O_NONBLOCK if they want it.

+
+  addr.sun_family = AF_UNIX;
+  memcpy (addr.sun_path, h->sa_sockpath, strlen (h->sa_sockpath) + 1);
+  if (bind (s, (struct sockaddr *) &addr, sizeof addr) == -1) {
+    SET_NEXT_STATE (%.DEAD);
+    set_error (errno, "bind: %s", h->sa_sockpath);
+    close (s);
+    return 0;
+  }
+
+  if (listen (s, 1) == -1) {
+    SET_NEXT_STATE (%.DEAD);
+    set_error (errno, "listen");
+    close (s);
+    return 0;
+  }
+
+  env = prepare_socket_activation_environment ();
+  if (!env) {
+    SET_NEXT_STATE (%.DEAD);
+    close (s);
+    return 0;
+  }
+
+  pid = fork ();
+  if (pid == -1) {
+    SET_NEXT_STATE (%.DEAD);
+    set_error (errno, "fork");
+    close (s);
+    nbd_internal_free_string_list (env);
+    return 0;
+  }
+  if (pid == 0) {         /* child - run command */
+    if (s != FIRST_SOCKET_ACTIVATION_FD) {
+      dup2 (s, FIRST_SOCKET_ACTIVATION_FD);
+      close (s);
+    }
+    else {
+      /* We must unset CLOEXEC on the fd.  (dup2 above does this
+       * implicitly because CLOEXEC is set on the fd, not on the
+       * socket).
+       */
+      flags = fcntl (s, F_GETFD, 0);
+      if (flags == -1) {
+        nbd_internal_fork_safe_perror ("fcntl: F_GETFD");
+        _exit (126);
+      }
+      if (fcntl (s, F_SETFD, flags & ~FD_CLOEXEC) == -1) {
+        nbd_internal_fork_safe_perror ("fcntl: F_SETFD");
+        _exit (126);
+      }
+    }
+


Looks correct.

+    char buf[32];
+    const char *v =
+      nbd_internal_fork_safe_itoa ((long) getpid (), buf, sizeof buf);
+    strcpy (&env[0][11], v);

We're using the magic '11' in several places, maybe it's worth a #defineto make it obvious it is strlen("LISTEN_PID=") ?

+
+    /* Restore SIGPIPE back to SIG_DFL. */
+    signal (SIGPIPE, SIG_DFL);
+
+    execvpe (h->argv[0], h->argv, env);
+    nbd_internal_fork_safe_perror (h->argv[0]);
+    if (errno == ENOENT)
+      _exit (127);
+    else
+      _exit (126);
+  }
+
+  /* Parent. */
+  close (s);
+  nbd_internal_free_string_list (env);
+  h->pid = pid;
+
+  h->connaddrlen = sizeof addr;
+  memcpy (&h->connaddr, &addr, h->connaddrlen);
+  SET_NEXT_STATE (%^CONNECT.START);
+  return 0;
+
+#else /* !HAVE_EXECVPE */
+  SET_NEXT_STATE (%.DEAD)
+  set_error (ENOTSUP, "platform does not support socket activation");
+  return 0;
+#endif

We probably ought to add a matching nbd_supports_socket_activation()feature function.

Or, it would be possible to create a fallback for execvpe() on platformsthat lack it by using execlpe() and our own path-walker utilityfunction. Can be done as a followup patch. If we do that, then themere presence of LIBNBD_HAVE_NBD_CONNECT_SA is witness enough of thefunctionality, rather than needing a runtime probe.

+++ b/lib/connect.c

+
+int
+nbd_unlocked_aio_connect_socket_activation (struct nbd_handle *h, char **argv)
+{
+  char **copy;
+
+  copy = nbd_internal_copy_string_list (argv);
+  if (!copy) {
+    set_error (errno, "copy_string_list");
+    return -1;
+  }
+
+  if (h->argv)
+    nbd_internal_free_string_list (h->argv);


How can h->argv ever be previously set?

+  h->argv = copy;
+
+  return nbd_internal_run (h, cmd_connect_sa);
+}
diff --git a/lib/handle.c b/lib/handle.c
index 2af25fe..a7f2c79 100644
--- a/lib/handle.c
+++ b/lib/handle.c
@@ -129,6 +129,16 @@ nbd_close (struct nbd_handle *h)
    free_cmd_list (h->cmds_in_flight);
    free_cmd_list (h->cmds_done);
    nbd_internal_free_string_list (h->argv);
+  if (h->sa_sockpath) {
+    if (h->pid > 0)
+      kill (h->pid, SIGTERM);
+    unlink (h->sa_sockpath);
+    free (h->sa_sockpath);
+  }
+  if (h->sa_tmpdir) {
+    rmdir (h->sa_tmpdir);
+    free (h->sa_tmpdir);
+  }
    free (h->unixsocket);
    free (h->hostname);
    free (h->port);

Somewhat pre-existing: we have a waitpid() here (good, so we don't hangon to a zombie process), but we are relying on the child process togracefully go away (whether for connect_command when stdin closes, orfor connect_sa on receipt of SIGTERM). Do we need a retry loop thatescalates to SIGKILL if the child process does not quickly respond tothe initial condition? On the other hand, the fact that our waitpid()blocks until the child changes status means that if a child ever wedges,the fact that we wedge too gives some visibility to the client that it'snot libnbd's fault and that they need to get the bug fixed in theirchild process.

I think it is ready to push. We may still need further tweaks, butthat's often the case.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3226
Virtualization:  qemu.org | libvirt.org

_______________________________________________
Libguestfs mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH libnbd v2 2/2] api: Implement local command with systemd socket activation.

Reply via email to