Hi,

due to my limited hardware equipment I implemented a mechanism that
automatically detects X server deadlocks and recovers from them as long
as the Kernel isn't locked either. It consists of a C programme which
detects the lockup and a shell script which starts the xserver and uses
kbd_mode and savetextmode/textmode to make the system usable again after
a lockup. This means the script must run with superuser privileges.
However, it will try to make X clients run with a normal UID taken from
$LOGNAME or $SUDO_USER.

The lockup detection uses two processes communicating through a pipe.
One of them performs dummy X operations and sends a heart beat through
the pipe. It will freeze when the X server locks. The other process
receives the heart beats and detects timeouts. It indicates a lockup in
the return value which is used in the shell script.

The script and the C programme are attached. The script is currently
configured for my setup. Changing it is just a matter of changing 5
variables.

Regards,
   Felix

-- 
               __\|/__    ___     ___     ___
__Tschüß_______\_6 6_/___/__ \___/__ \___/___\___You can do anything,___
_____Felix_______\Ä/\ \_____\ \_____\ \______U___just not everything____
  [EMAIL PROTECTED]    >o<__/   \___/   \___/        at the same time!
/* compile with:
 *   gcc -o xpoll -lX11 xpoll.c
 */
#include <X11/Xlib.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/wait.h>

/* messages sent through the pipe */
#define NO_SERVER   'n'
#define HEART_BEAT  'h'

/* return codes */
#define NORMAL_EXIT 0
#define LOCKUP      1
#define NO_CONNECT  2
#define OTHER_ERR   3

/* constants */
#define MAX_TRY_OPEN   10
#define BEAT_INTERVAL  2
#define CHECK_INTERVAL 1
#define TIMEOUT        4

/* file descriptors of the pipeline */
int stethoscope[2];

void poll_x () {
    Display *display;
    Window root_win;
    int i;
    char buffer[1];
  /* try to open display for MAX_TRY_OPEN seconds */
    for (i = 0; i <= MAX_TRY_OPEN; ++i) {
	if ((display = XOpenDisplay (NULL)))
	    break;
	sleep (CHECK_INTERVAL);
    }
    if (display == NULL) {
      /* signal "no connect" to parent */
	buffer[0] = NO_SERVER;
	write (stethoscope[1], buffer, 1);
	close (stethoscope[1]);
	return;
    }
    root_win = XDefaultRootWindow (display);
    while (1) {
	XWindowAttributes attr;
      /* use XGetWindowAttributes to create some activity without actually
       * drawing something */
	XGetWindowAttributes (display, root_win, &attr);
      /* if it returns, X is not locked and we send a heart beat */
	buffer[0] = HEART_BEAT;
	write (stethoscope[1], buffer, 1);
	sleep (BEAT_INTERVAL);
    }
  /* will never get here */
}

int monitor_child () {
  /* counts the number of CHECK_INTERVALs in which there was no heart beat
   * give some extra time in the beginning while the X server starts up */
    int time = -MAX_TRY_OPEN;
    while (1) {
	int r;
	char buffer[1];
      /* try to read something from the non-blocking pipe */
	r = read (stethoscope[0], buffer, 1);
	if (r == -1 && errno != EAGAIN) {
	    perror ("read");
	    return OTHER_ERR;
	} else if (r == 0 || (r == -1 && errno == EAGAIN)) {
	    if (time++ == TIMEOUT)
		break; /* timeout expired */
	    sleep (CHECK_INTERVAL);
	} else {
	    time = 0;
	    if (buffer[0] == NO_SERVER)
		return NO_CONNECT;
	    else if (buffer[0] != HEART_BEAT) {
		fprintf (stderr, "unexpected message\n");
		return OTHER_ERR;
	    }
	}
    }
  /* timeout expired, either child died (X server exit) or lockup */
    return NORMAL_EXIT;
}

int main () {
    pid_t pid;
    int flags, r;

  /* make a pipe */
    if (pipe (stethoscope) == -1) {
	perror ("pipe");
	return OTHER_ERR;
    }
  /* make the reading end non-blocking */
    if ((flags = fcntl (stethoscope[0], F_GETFL, 0)) == -1) {
	perror ("fcntl");
	return OTHER_ERR;
    }
    if (fcntl (stethoscope[0], F_SETFL, flags | O_NONBLOCK) == -1) {
	perror ("fcntl");
	return OTHER_ERR;
    }

  /* fork a child process */
    pid = fork ();
    if (pid == 0) {
	close (stethoscope[0]);
      /* child will poll the x-server */
	poll_x ();
	return 0;
    } else if (pid > 0) {
	close (stethoscope[1]);
      /* parent will monitor the child */
	r = monitor_child ();
    } else {
	perror ("fork");
	return OTHER_ERR;
    }

    if (r == NORMAL_EXIT) {
      /* if child completed then it's really a normal exit
       * otherwise it's a lockup */
	pid_t rpid = waitpid (pid, NULL, WNOHANG);
	if (rpid == -1) {
	    perror ("waitpid");
	    return OTHER_ERR;
	} else if (rpid == 0) {
	    fprintf (stderr, "X server locked up\n");
	  /* ok, lockup */
	    r = LOCKUP;
	} else
	    fprintf (stderr, "X server exited normally\n");
    } else {
      /* wait for child to complete */
	if (waitpid (pid, NULL, 0) == -1) {
	    perror ("waitpid");
	    return OTHER_ERR;
	}
	if (r == NO_CONNECT)
	    fprintf (stderr, "Could not connect to an X server\n");
    }

    return r;
}

Attachment: glserver
Description: application/shellscript

Reply via email to