On Monday 19 of March 2012, Michael Meeks wrote:
> On Mon, 2012-03-19 at 07:33 +0100, Lubos Lunak wrote:
> >  Oh, I see. I've already noticed this myself, and that's a good
> > explanation for Voreppe's (lack of) builds. That's a rather bad bug for
> > tinderbox builds, and we really could use a tinderbox watching over our
> > commits breaking the MSVC build (I think I fixed 5 MSVC regressions last
> > week at the very least).
>
>       Yep - perhaps a re-boot-box-and-restart-build-after-4-hours of no
> watchdog ping or something ? :-)

 Nah, so crude :). I've written a make watchdog, it's currently being tested 
on the Win-x86_6-fast tinderbox to see how it works in practice.

>       Anyhow - glad to see you Windows-ised :-)

 Did anyone say I was staying 8-O ?

-- 
 Lubos Lunak
 l.lu...@suse.cz
TIMEOUT=1000

CXX=g++
CXXFLAGS=-O2

.PHONY: clean test_clean all test perform_test

all:
	$(CXX) -Wall $(CXXFLAGS) makewatchdog.cpp -o makewatchdog

test: all test_clean
	./makewatchdog log 20 2 $(MAKE) -j2 perform_test

perform_test: first second

first: output
	cp output first

second: output2
	cp output2 second

output:
	( sleep $(TIMEOUT); echo kuk ) >output

output2:
	( sleep $(TIMEOUT); echo kuk ) >output2

test_clean:
	rm -f first second output output2 log

clean: test_clean
	rm -f makewatchdog
/*

  Copyright (c) 2012 Lubos Lunak <l.lu...@suse.cz>

  Permission is hereby granted, free of charge, to any person obtaining a
  copy of this software and associated documentation files (the "Software"),
  to deal in the Software without restriction, including without limitation
  the rights to use, copy, modify, merge, publish, distribute, sublicense,
  and/or sell copies of the Software, and to permit persons to whom the
  Software is furnished to do so, subject to the following conditions:

  The above copyright notice and this permission notice shall be included in
  all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.

*/

#include <algorithm>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <string.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#include <vector>

//#define DEBUG

using namespace std;

static int usage( const char* argv0 )
    {
    printf( "Usage: %s [outputFile] [timeout] [maxAttempts] [make command...]\n", argv0 );
    return 0;
    }

const int FAILURE = 3; // do not use 1 or 2 (check exit values make uses)
#define NAME "Make watchdog: "

struct ProcInfo
    {
    pid_t pid;
    pid_t parent;
    string cmdline;
    };

typedef vector< ProcInfo > ProcInfoList;

static ProcInfoList findAllProcesses()
    {
    ProcInfoList procInfos;
    DIR* dir = opendir( "/proc/" );
    if( dir == NULL )
        {
        fprintf( stderr, NAME "Cannot read /proc.\n" );
        return procInfos;
        }
    while( dirent* entry = readdir( dir ))
        {
        char buf[ 16384 ];
        ProcInfo procInfo;
        procInfo.pid = atoi( entry->d_name );
        if( procInfo.pid == 0 )
            continue;
        if( FILE* f = fopen(( string( "/proc/" ) + entry->d_name + "/stat" ).c_str(), "r" ))
            {
            int size = fread( buf, 1, sizeof( buf ) - 1, f );
            if( ferror( f ))
                {
#ifdef DEBUG
                fprintf( stderr, "Cannot read stat for %s\n", entry->d_name );
#endif
                fclose( f );
                continue;
                }
            buf[ size ] = '\0';
            fclose( f );
            procInfo.parent = 0;
            if( const char* lparen = strchr( buf, '(' ))
                if( const char* rparen = strrchr( lparen, ')' ))
                    sscanf( rparen + 2, "%*c %d", &procInfo.parent );
            if( procInfo.parent == 0 )
                continue;
            if( procInfo.pid == procInfo.parent )
                continue; // just in case
            }
        else
            {
#ifdef DEBUG
            fprintf( stderr, "Cannot open stat for %s\n", entry->d_name );
#endif
            continue;
            }
        if( FILE* f = fopen(( string( "/proc/" ) + entry->d_name + "/cmdline" ).c_str(), "r" ))
            {
            *buf = '\0';
            fscanf( f, "%s", buf );
            fclose( f );
            procInfo.cmdline = buf;
            }
        else
            { // not an error
#ifdef DEBUG
            fprintf( stderr, "Cannot read cmdline for %s\n", entry->d_name );
#endif
            }
        // ok
        procInfos.push_back( procInfo );
        }
    closedir( dir );
    return procInfos;
    }

static void findToKillRecursive( pid_t parent, const ProcInfoList& allProcesses, ProcInfoList* toKill )
    {
    for( unsigned int i = 0;
         i < allProcesses.size();
         ++i )
        if( allProcesses[ i ].parent == parent )
            {
            findToKillRecursive( allProcesses[ i ].pid, allProcesses, toKill );
            toKill->push_back( allProcesses[ i ] );
            }
    }

static vector< ProcInfo > findToKill( pid_t topParent )
    {
    ProcInfoList allProcesses = findAllProcesses();
    ProcInfoList toKill;
    findToKillRecursive( topParent, allProcesses, &toKill );
#ifdef DEBUG
    bool found = false;
#endif
    for( unsigned int i = 0;
         i < allProcesses.size();
         ++i )
        if( allProcesses[ i ].pid == topParent )
            {
            toKill.push_back( allProcesses[ i ] );
#ifdef DEBUG
            found = true;
#endif
            break;
            }
#ifdef DEBUG
    if( !found )
        fprintf( stderr, "Top parent process info not found.\n" );
#endif
    return toKill;
    }

// I hope I got this one right
static int makeExitCode( int status )
    {
    if( WIFEXITED( status ))
        return WEXITSTATUS( status );
    if( WIFSIGNALED( status ))
        return 128 + WTERMSIG( status );
    return FAILURE;
    }

enum KillStatus
    {
    SuccessfullExit, // exited cleanly
    KilledInterrupted, // was interrupted (cleanly)
    KilledForced       // force killed (not clean)
    };

static int killMake( pid_t pid, KillStatus* killed )
    {
#ifdef DEBUG
    fprintf( stderr, "Going to kill pid %d.\n", pid );
#endif
    ProcInfoList toKill = findToKill( pid );
    // SIGINT first
    for( unsigned i = 0;
         i < toKill.size();
         ++i )
        kill( toKill[ i ].pid, SIGINT );
    time_t t = time( NULL );
    while( t + 10 > time( NULL ))
        sleep( 2 ); // may get interrupted by a signal
    int status;
    bool pidHasFinished = false;
    // need to clean up the top parent
    if( waitpid( pid, &status, WNOHANG ) >= 0 )
        {
        pidHasFinished = true;
        *killed = KilledInterrupted;
        }
    // now forcibly
    for( unsigned i = 0;
         i < toKill.size();
         ++i )
        {
        if( kill( toKill[ i ].pid, 0 ) == 0 ) // still alive?
            {
            *killed = KilledForced; // unclear cleanup
            fprintf( stderr, NAME "Process %d not interrupted, forcibly killing.\n", toKill[ i ].pid );
            fprintf( stderr, NAME "Cmdline: %s\n", toKill[ i ].cmdline.c_str());
            kill( toKill[ i ].pid, SIGKILL );
            }
        }
    if( !pidHasFinished )
        waitpid( pid, &status, 0 );
    return makeExitCode( status );
    }

bool makeNonBlocking( int fd )
    {
    int options = fcntl( fd, F_GETFL );
    if( options < 0 )
        {
        perror( NAME "fcntl( F_GETFL )" );
        return false;
        }
    if( fcntl( fd, F_SETFL, O_NONBLOCK | O_CLOEXEC ) < 0 )
        {
        perror( NAME "fcntl( F_SETFL )" );
        return false;
        }
    return true;
    }

static int childPipeWrite;

static void childHandler( int )
    {
    char c = '\0';
    write( childPipeWrite, &c, 1 );
    }

static int watchMake( pid_t pid, KillStatus* killed, int timeout, int watchFd )
    {
    int pipeFd[ 2 ];
    if( pipe( pipeFd ) < 0 )
        {
        perror( NAME "pipe()" );
        return FAILURE;
        }
    childPipeWrite = pipeFd[ 1 ];
    int childPipeRead = pipeFd[ 0 ];
    if( !makeNonBlocking( childPipeRead ))
        return FAILURE;
    struct sigaction act;
    act.sa_handler = childHandler;
    sigemptyset( &act.sa_mask );
    act.sa_flags = SA_NOCLDSTOP;
#ifdef SA_RESTART
    act.sa_flags |= SA_RESTART;
#endif
    sigaction( SIGCHLD, &act, NULL );
    time_t lastActivity = time( NULL );
    for(;;)
        {
        // select() cannot be used to watch for more data in watchFd, because it is a file
        // and our reading position will be at its end for most of the time, meaning that
        // select() will signal the fd is ready to read (i.e. eof)
#ifdef DEBUG
        sleep( 2 );
#else
        sleep( 10 );
#endif
        char buf[ 1024 ];
        if( read( childPipeRead, buf, 1 ) > 0 )
            {
#ifdef DEBUG
            fprintf( stderr, "Child exited\n" );
#endif
            int status;
            while( waitpid( pid, &status, 0 ) < 0 && errno == EINTR )
                ;
            signal( SIGCHLD, SIG_DFL );
            return makeExitCode( status );
            }
        if( read( watchFd, buf, sizeof( buf )) > 0 )
            {
#ifdef DEBUG
            fprintf( stderr, "Activity in output file.\n" );
#endif
            while( read( watchFd, buf, sizeof( buf )) > 0 )
                    ;
            lastActivity = time( NULL );
            }
        else if( lastActivity + timeout < time( NULL ))
            { // timeout
#ifdef DEBUG
            fprintf( stderr, "Activity timeout.\n" );
#endif
            signal( SIGCHLD, SIG_DFL );
            return killMake( pid, killed );
            }
        }
    }

static int runMake( int argc, char** argv, KillStatus* killed, int timeout, int watchFd )
    {
    pid_t pid = fork();
    switch( pid )
        {
        default: // parent
            return watchMake( pid, killed, timeout, watchFd );
        case 0: // child
            close( watchFd );
            execvp( argv[ 0 ], argv );
            break;
        case -1: // failure
            perror( NAME "fork()" );
            break;
        }
    fprintf( stderr, NAME "Make command invocation failed.\n" );
    return FAILURE;
    }

static int setupOutputFile( const char* outputFileName )
    {
    int outputFd = open( outputFileName, O_APPEND | O_CREAT | O_TRUNC | O_WRONLY, 0644 );
    if( outputFd < 0 )
        {
        fprintf( stderr, NAME "Opening output file %s for writing failed: %s\n", outputFileName, strerror( errno ));
        return -1;
        }
    if( dup2( outputFd, STDOUT_FILENO ) < 0 )
        {
        perror( NAME "dup2()" );
        return -1;
        }
    if( dup2( outputFd, STDERR_FILENO ) < 0 )
        {
        perror( NAME "dup2()" );
        return -1;
        }
    close( outputFd );
    int watchFd = open( outputFileName, O_RDONLY );
    if( watchFd < 0 )
        {
        perror( NAME "open()" );
        return -1;
        }
    if( !makeNonBlocking( watchFd ))
        return -1;
    return watchFd;
    }

int main( int argc, char** argv )
    {
    if( argc < 4 )
        return usage( argv[ 0 ] );
    const char* outputFileName = argv[ 1 ];
    int timeout = atoi( argv[ 2 ] );
    int attempts = atoi( argv[ 3 ] );
    int watchFd = setupOutputFile( outputFileName );
    if( watchFd < 0 )
        return FAILURE;
    int exitcode = 0;
    for( int attempt = 1;
         attempt <= attempts;
         ++attempt )
        {
        KillStatus killed = SuccessfullExit;
        exitcode = runMake( argc - 4, argv + 4, &killed, timeout, watchFd );
        switch( killed )
            {
            case SuccessfullExit:
                break;
            case KilledInterrupted:
                if( attempt == attempts )
                    fprintf( stderr, NAME "Error: Make command timed out, maximum number of attempts reached,"
                        " failing, exit code %d.\n", exitcode );
                else
                    fprintf( stderr, NAME "Error: Make command timed out, attempt %d/%d, interrupting"
                        " and retrying.\n", attempt, attempts );
                break;
            case KilledForced:
                fprintf( stderr, NAME "Error: Make command timed out, force killed, failing,"
                    " exit code %d\n", exitcode );
                attempt = attempts + 1; // break out of the loop
                break;
            }
        }
    return exitcode;
    }

Attachment: windows.sh
Description: application/shellscript

_______________________________________________
LibreOffice mailing list
LibreOffice@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/libreoffice

Reply via email to