I've included a Perl script below that inserts and updates copyrights
in a predefined set of text based files.  This is the script that was
used to update the copyright headers in the sources that were uploaded
to the River SVN repository.

The script was originally written by a former member of the original
Jini team at Sun.  I've merely fixed a couple bugs and modified it to
work without knowledge of an underlying source control system.  The
comments at the top of the script should provide enough information
regarding usage.

Let me know if you have any questions or comments.



Frank



-------------  Begin Included File  -------------


#
# Add or update the copyright in a given file.  The copyright is
# assumed to be in a comment that contains a particular copyright
# string ($copyrightString) somewhere inside it, and that the comment
# contains nothing but the copyright.  The entire comment including
# that line will be removed, and a new comment consisting solely of
# the new copyright will be inserted.
#
# The copyright is read from "copyright.txt", either in the local
# directory where copyright is invoked or from the dirctory where the
# script itself lives (should be an option someday).  This is the text
# that will go *inside* the comment.  It does not contain any
# comment-related characters, so that the comment could be added to
# files with different comment styles.
#
# The copyright in "copyright.txt" can contain an optional copyright
# version line that assigns a version string to that specific copyright
# (eg, CopyrightVersion v1.0 ).  When the text changes, this version
# string should change.  If the script encounters a file whose
# copyright has the same version as the current one, the file will
# not be touched.
#
# Comments are understood on a per-file-type basis.
#
# Without arguments, this script will process all files within the
# current directory and its sub-directories.  Alternatively, one or
# more files and/or directories can be specified as arguments.
#
#   Usage: copyright [item ...]
#
#          item: One or more files and/or directories to be processed
#


#
# The following lines are designed to run perl on this script, no
# matter which shell the script is accidentally run under.  The point
# here is this: The normal way to invoke perl on a script is to put
# a line starting with #! at the begging of the script that invokes
# perl, as in "#!/usr/bin/perl".  The problem with this is that the
# script then hard codes the location of perl into itself, and when
# that changes the script breaks (and rather mysteriously at that).
# So this script purposefully specifies *no* interpreter at the top,
# and then adapts to whatever shell is running to get to perl.  (I
# tried putting #!/bin/sh at the top to guarantee the shell, but perl
# is too clever by half -- when the shell script executes perl, perl
# looks at the #! line and says "this is a shell script" and invokes
# the shell.)
#
# Yes this is ugly, but it is an Officially Certified Hack(TM), so
# no purist, anti-hacker, ivory tower kvetching.
#

eval '(exit $?0)' && eval 'exec perl -S $0 ${1+"$@"}'
    & eval 'exec perl -S $0 $argv:q'
            if 0;

$scriptDir = $0;
$scriptDir =~ s,/[^/]+$,,;
push(@INC, $scriptDir);

$Verbose = 1;                           # someday make this turn-off-able
$CopyrightFile = "$scriptDir/copyright.txt"; # make this an option someday
@args = ($#ARGV < 0 ? '.' : @ARGV);  # by default, use all source

$copyrightString = "Licensed to the Apache Software Foundation";

# The ':'-separated pre, mid, and end comment strings for each file type

# Optional fourth and fifth elements are regular expressions for pre and end # (otherwise a simple substring match is done). If the fourth and/or fifth
# elements exist, they will override the simple pre and end elements.

$java  = '/*: *: */';
$shell = '#/*:#:#*/';
$bat   = '@rem /*:@rem:@rem */';
$html  = '<!--: !: !-->:\<\!(\n|\s|\-\-):.*(\-\-\s*)?\>';

$mapping{'.bat'}                = $bat;

$mapping{'.c'}                  = $java;
$mapping{'.h'}                  = $java;
$mapping{'.java'}               = $java;
$mapping{'.policy'}             = $java;

$mapping{'.html'}               = $html;
$mapping{'.jsp'}                = $html;
$mapping{'.jspfragment'}        = $html;
$mapping{'.xml'}                = $html;

$mapping{'.nmk'}                = $shell;
$mapping{'.jmk'}                = $shell;
$mapping{'.sh'}                 = $shell;
$mapping{'.csh'}                = $shell;
$mapping{'.ksh'}                = $shell;
$mapping{'.pl'}                 = $shell;
$mapping{'.properties'}         = $shell;
$mapping{'makefile'}            = $shell;
$mapping{'Makefile'}            = $shell;
$mapping{'GNUmakefile'}         = $shell;

# technically all you need is "<!--" and "-->" but we use the !s to draw
# a cutesy box.

&createGetPats();

#
# Process the arguments.  If the arg is a directory, recurse looking for
# known file types
#
foreach $arg (@args) {
    if (-T $arg) {                      # text file
        push(@files, $arg);
    } elsif (-d $arg) {                 # directory -- run find
        push(@files, split(/\s+/, $files = `find $arg \\( $names \\) -print`));
    } else {
        print STDERR "unknown file type for $arg: ignoring\n";
    }
}

&getCurrentCopyright();                     # read the current copyright

$error = 0;                             # process each file
foreach $file (@files) {
    $error |= &process_file($file);
}

exit($error);

#
# process_file:
#       Process a single file.  Figure out from its suffix what kind of
#       comment to use (currently only .java, .c, and .h are
#       understood).
#
sub process_file
{
    local ($file) = @_;

    if (-l $file) {
        print "link: $file: skipping\n" if ($Verbose);
        return;
    }

    local ($pre, $mid, $end, $prePat, $endPat) = &getPats($file);

    if ($pre eq 'UNDEF') {
        print "unknown file type: $file: skipping\n" if ($Verbose);
        return;
    }

    local ($found_copyright);                   # did we find copyright comment?
    local ($comment_start);                     # current comment starting 
position
local ($in_comment); # boolean stating whether we're inside a comment block local ($copyright_start, $copyright_end); # copyright comment start/end positions
    local ($copyright_version);                 # copyright version in file

    open(FILE, $file) || die("$file: $!");

    $prePat = &pat($pre) unless (defined($prePat) && $prePat ne '');
    $endPat = &pat($end) unless (defined($endPat) && $endPat ne '');



    while (<FILE>) {                              # search for existing 
copyright

        if ($. == 1 && /^[#%]!/)
        {                                       # skip over initial #! or %!
            $copyright_start    = tell(FILE);   # overwritten if comment is...
            $copyright_end      = tell(FILE);   # overwritten if comment is...
            next;                               # ...present later in file
        }

        ##
        ## If the prePat is closed by the endPat on the same line, a copyright
        ## is probably not there:
        ##

        if ( /$prePat.*$endPat/ )
        {
            next;
        }
        elsif ( /$prePat/ )
        {
            $in_comment = 1;

            $comment_start = tell(FILE) - length;
        }
        elsif ( /.*$copyrightString/ && $in_comment)
        {
                $found_copyright++;
                $copyright_start = $comment_start;
        }
        elsif ( /CopyrightVersion ([^\s]+)/ )
        {
            $copyright_version = $1;
        }
        elsif ( /$endPat/ )
        {
            $in_comment = 0;

            ##
## When the file is a script with a special first line (eg, #!/ bin/sh), copyright_end will ## be set to some number of characters greater than zero (ie, the length of that first line) ## and copyright_start will be equal to copyright_end. Otherwise, copyright_end will have
            ## been set to zero, so we need to account for this special case:
            ##

if ( $found_copyright && (!$copyright_end || ($copyright_start== $copyright_end)) )
            {
                $copyright_end = tell(FILE);
            }
        }
    }
$found_copyright = 0 if ( $found_copyright && (!$copyright_end || ($copyright_start==$copyright_end)) );

    if ($Verbose) {
        print "$file: ";
        if (!$found_copyright) {
            print "adding";
        } else {
            print "replacing copyright ";
            print ($copyright_version ? $copyright_version: "(unversioned)");
        }
        print "\n";
    }

    local ($tmp) = "$file.cr";                        # create tmp file with 
new stuff
    open(NEW, ">$tmp") || die("$tmp: $!");
    seek(FILE, 0, 0);
    read(FILE, $buf, $copyright_start);         # copy pre-comment stuff
    print NEW $buf;
    print NEW "$pre\n";
    foreach $c (@Copyright) {                   # put in copyright
        print NEW $mid, ' ', $c;
    }
    print NEW "$end\n";
    seek(FILE, $copyright_end, 0);              # copy post-comment stuff
    while (read(FILE, $buf,4096)) {
        print NEW $buf;
    }

    close(NEW);                                 # rename tmp to cur
    close(FILE);
    if (!rename($tmp, $file)) {
        print "couldn't rename files for $file: $!";
        return 1;
    }
    return 0;
}

#
# getCurrentCopyright
#       Read current copyright and version (version *must* be present).
#
sub getCurrentCopyright
{
    open(COPYRIGHT, "copyright.txt") || open(COPYRIGHT, $CopyrightFile)
        || die("$CopyrightFile: $!");
    @Copyright = <COPYRIGHT>;
    close(COPYRIGHT);
    foreach $_ (@Copyright) {
        if (/CopyrightVersion ([^\s]+)/) {
            $CurrentCopyrightVersion = $1;
            last;
        }
    }

print "Copyright version $CurrentCopyrightVersion\n" if ($Verbose && $CurrentCopyrightVersion);
}

#
# createGetPats
#       Build up the maps from file name to pre, mid, and end comment
#       strings by creating the getPats method.  Also define $name
#       for the find comment to find such things (if necessary).
#
sub createGetPats {
    push(@matchList, 'sub getPats {');
    push(@matchList, '    local ($_) = @_;');

    foreach $type (keys(%mapping)) {
        $map = $mapping{$type};
        ($pre, $mid, $end) = ($map =~ /^([^:]*)\:([^:]*)\:(.*)$/);
        # Optional args
        $prePat = $endPat = undef;
        if ($end =~ /^([^:]*)\:([^:]*)\:(.*)$/) {
            $prePat = $2;
            $endPat = $3;
            $end = $1;
        }
        if ($type =~ /^\./) {
            push(@findArgs, "-name '*$type'");
            $pat = "m,\\$type\$,";
        } else {
            push(@findArgs, "-name '$type*'");
            $pat = "(m,^$type, || m,/$type,)";
        }
        push(@matchList, "    $pat && return ('$pre', '$mid', '$end', " .
            (defined($prePat) ? "'$prePat', " : "undef, ") .
            (defined($endPat) ? "'$endPat');" : "undef);"));
    }

    $names = join(' -o ', @findArgs);

push(@matchList, " return ('UNDEF', 'UNDEF', 'UNDEF', undef, undef);");
    push(@matchList, "}");
    $getPats = join("\n", @matchList, '');
    eval $getPats;
}

#
# pat
#       Turn a simple string into something usable in a pattern by
#       escaping the special characters
#
sub pat {
    local ($_) = @_;

    s/(\W)/\\$1/g;
    return $_;
}


-------------  End Included File  -------------



Reply via email to