[Mesa-dev] salvage files

Gerard Saraber Wed, 18 Aug 1999 19:55:41 -0700

Hi everyone :-)
I was thinking it would be nice if we could automate the recovery/rename
process.
So I wrote a littil program that does - as best I could come up with -
just that.
The basic algorithm is easy, it diffs all recovered files ( named like
this #1650724) against all files in your Mesa source tree, and it
recommends the file
with the least ammount of diff output, ideally no output if the files
are the same.

it creates log.txt with the results and a rename.sh that is basically a
shell script that renames all the found files, but only the ones that
match 100% it puts in commented lines for the ones that have diff
output..

it requires two files, infiles.txt, a list of salvaged files, you can
make it like this: find salvage/ > infiles.txt
don't worry about directories in the file, it checks for it..
the other file it requires is cmpfiles.txt, this should contain a list
of the files in your Mesa sourcetree, make it like this: find MesaCVS/ >
cmpfiles.txt

I know this approach is kinda dumb, also known as "brute force", and I'm
open to more intelligent approaches..
the program has been comparing my MesaCVS.tar.gz (from aug. 4) against
the salvage files on ftp.mesa3d.org/mesa/SALVAGE/ , and so far it hasn't
found a single match, maybe I'm not comparing against the right tree (or
oh my, a bug).. you guys might have more luck. :)

I've attached it for your enjoyment.. 
incase the listserver strips attachments, or if you have other problems,
just download it at: ftp://saraber.dhs.org/pub/mesa/resque.c

Regards,
Gerard Saraber
[EMAIL PROTECTED]
[EMAIL PROTECTED]

/*****************************************************************************
 *
 * This program was written to help the recovery of the Mesa CVS repository
 *  it requires (!) 2 files, infiles.txt and cmpfiles.txt
 *  infiles.txt is a list of the numbered inode files that need to be recovered
 *       fill it like this $ find salvage/ > infiles.txt
 *  cmpfiles.txt is a list of files in the Mesa source tree to compare the files
 *       to, it should be the closest snapshot you have.
 *       fill this file like this: $ find MesaCVS/ > cmpfiles.txt
 *  This program goes through the infiles.txt and diffs each file against all
 *   files listed in cmpfiles.txt and puts what it thinks is the closest match
 *   in "log.txt" and "rename.sh" .. you'll see a bunch of files with
 *   1000001 lines difference, at least one of these is a binary file and they 
 *   differ :-)
 *  the generated "rename.sh" is actually a shell script that automatically
 *   renames the numbered inode files to their correct equivalent, but only
 *   if this program is 100% sure, so no output from diff.. otherwise
 *   it will put a line in that is commented out, like this:
 *   #mv salvage/#1654881 MesaCVS/src/Windows/wgl.c # 1206 lines differ
 *   You should edit this file and look for files that you think might match
 *    like with 1 or 2 lines different, just uncomment the line.
 *
 ****************************************************************************** 
 *
 * Date    : 18/08/1999
 * License : GNU GPL, see http://www.gnu.org/ for details
 * Author  : Gerard A. M. Saraber
 *           [EMAIL PROTECTED]
 * Compile : gcc resque.c -o resque
 * Run     : ./resque
 * Let me know of any bugs, comments or whatever..
 *
 ******************************************************************************
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>

typedef char Filename[350];

typedef int bool;
#define true 0
#define false 1

int isfile( char *fname )
{
  struct stat statbuf;
  
  stat( fname,&statbuf );
  return ((statbuf.st_mode&S_IFREG)==S_IFREG);
};
  
long count_lines_in_file( FILE *f,bool seek )
{
  long count=0;
  char line[350];
  do { // count number of lines
    fgets( line,349,f );
    line[ strlen(line)-1 ] =0;
    if( isfile( line ) )
      count++;
  } while( !feof( f ) );
  if( seek==true ) {
    fseek( f,0,SEEK_SET ); // back to beginning of file
  };
  return count;
};

int main()
{
  Filename *pInFilenames,*pCmpFilenames;
  FILE *f,*log,*script,*diffout;
  long n_in_files,n_cmp_files;
  char line[350];
  long l;
  char tmpCommand[900];
  long diffoutput_lines=0;
  long minimumdiffsofar; 
  long bestmatchingfile;
  long infile,cmpfile;


  if( (f=fopen("infiles.txt","rt"))==NULL ) {
    printf("error opening infiles.txt\n");
    exit(1);
  };
  n_in_files=count_lines_in_file( f,true );
  pInFilenames = (Filename*)malloc( sizeof(Filename)*n_in_files );
  for( l=0;l<n_in_files-1;l++ ) { // really read the files
    memset( line,0,349 );
    fgets( line,349,f );
    line[strlen(line)-1] =0;
    if( isfile( line ) ) {
      strncpy( pInFilenames[l], line,strlen(line) );
    } else {
      l--;
    }; 
  };
  fclose( f );  

  if( (f=fopen("cmpfiles.txt","rt"))==NULL ) {
    printf("error opening cmpfiles.txt\n");
    exit(1);
  };
  n_cmp_files = count_lines_in_file( f,true );
  pCmpFilenames = (Filename*)malloc( sizeof(Filename)*n_cmp_files );
  for( l=0;l<n_cmp_files;l++ ) { // really read the files
    memset( line,0,349 );
    fgets( line,349,f );
    line[strlen(line)-1] =0;
    if( isfile( line ) ) {
      strncpy( pCmpFilenames[l], line,strlen(line) );
    } else {
      l--;
    };
  };
  fclose( f );
  
  // we now have two lists in memory,
  //  one with the salvaged files, one with the files to compare to
  
//  printf("read bier: [%s]\n",pInFilenames[23] );

  log=fopen("log.txt","wt");
  script=fopen("rename.sh","wt");
  fprintf( script,"#!/bin/sh\n");

//printf("dumping cmpfiles:\n");
//for( l=0;l<n_cmp_files;l++ ) {
//  printf("%d: %s\n",l,pCmpFilenames[l] );
//};
    
  printf("here we go:\n");
  for( infile=0;infile<n_in_files-1;infile++ ) {
    minimumdiffsofar=99999999;
    bestmatchingfile=-1;
    printf("matching : %s [",pInFilenames[infile] );
    fflush( stdout );
    for( cmpfile=0;cmpfile<n_cmp_files-1;cmpfile++ ) {
      sprintf( tmpCommand,"diff %s %s",pInFilenames[infile],pCmpFilenames[cmpfile] );
//      printf("executing: %s\n",tmpCommand );
//      usleep( 250 );
  
      diffout = popen( tmpCommand,"r" );
      if( diffout==NULL ) {
        printf("error executing diff command: %s\n",tmpCommand );
        fclose( log );
        fclose( script );
        exit(1);
      };
      diffoutput_lines=0;
      do {
        memset( line,0,349 );
        fgets( line,349,diffout );
        diffoutput_lines++;
        if( strncmp( line,"Binary files",12 ) == 0 ) {
          diffoutput_lines += 999999; // trick if below into thinking files are VERY 
different
        };
      } while( !feof( diffout ) );
      pclose( diffout );
      if( diffoutput_lines < minimumdiffsofar ) {
        minimumdiffsofar = diffoutput_lines;
        bestmatchingfile = cmpfile;
      };
      if( cmpfile%100 == 0 ) { printf("."); fflush( stdout ); }; // progress indicator
    };
    if( minimumdiffsofar == 0 ) { // diff no output => exact match
      printf("] - Match!!!! filename: %s\n",pCmpFilenames[bestmatchingfile] );
      fprintf( log,"we have a match: %s == 
%s\n",pInFilenames[infile],pCmpFilenames[bestmatchingfile] );
      fprintf( script,"mv %s 
%s\n",pInFilenames[infile],pCmpFilenames[bestmatchingfile] );
    } else {
      printf("] - Probable match filename: %s, %d lines 
differ.\n",pCmpFilenames[bestmatchingfile],minimumdiffsofar );
      fprintf( log,"  partial match: %s ~ 
%s\n",pInFilenames[infile],pCmpFilenames[bestmatchingfile] );
      fprintf( script,"#mv %s %s # %d lines 
differ\n",pInFilenames[infile],pCmpFilenames[bestmatchingfile],minimumdiffsofar );
    };
    fflush( NULL ); // flush all streams
    
  }; // end for all infiles
  fclose( log );  
  fclose( script );
  printf("all done\n");
  return 0;
};

[Mesa-dev] salvage files

Reply via email to