Hi,
I recently put something together. Hope it's still useful. It even has
some POD doc ... ;-)
Ekkehard
[snip]
#!perl -w
use English;
use Getopt::Std;
use strict;
#==========================================================================
=pod
=item @str strings($file, $maxbytes, $translate, $minchar, $offset)
This method is similar to the GNU version of the strings utility. It
reads the file $file upto the $maxbytes byte and extracts all strings
it encounters. A string is here a sequence of at least $minchar bytes
in the range 0x40 - 0x176 or any umlaut followed by a byte not out of
those character set. If $translate is set to 1 any umlauts and german
'sz' found in IBM encoding are translated to ISO-8859-1 encoding. If
the $offset parameter is not 0 each element in the returned list
contains the offset within the file before the string and - tab
separated - the string found. The offset is given in either decimal
(d), octal (o) or hexadecimal (x) notation.
=cut
#==========================================================================
sub strings
{
my ($file, $maxbytes, $translate, $minchar, $offset) = @ARG;
return unless -r $file;
return unless $translate =~ /^[01]$/;
return unless $minchar =~ /^\d+$/;
return unless $offset =~ /^[0dox]{1}$/;
$maxbytes = 3000 unless defined $maxbytes;
my $data;
open IN,"<$file" or return;
binmode IN;
read IN, $data, $maxbytes or return;
close IN or return;
# AE OE UE ae oe ue sz
my $IBM = '\216\231\232\204\224\201\341';
my $ANSI = '\304\326\334\344\366\374\337';
# What we consider as 'ascii':
my $ascii="\040-\176$IBM$ANSI";
my @strings;
while ($data =~ /([$ascii]{$minchar,})[^$ascii]/g)
{
if ($offset ne "0")
{
push @strings,
sprintf("%$offset",pos($data)-length($1)-1)."\t$1";
}
else
{
push @strings, $1;
}
}
# Translate IBM to ANSI encoding. We can not use the variables
# here as the translation table is generated at compile time (see
# tr documentation) !!!
if ($translate == 1)
{
foreach (@strings)
{ tr [\216\231\232\204\224\201\341]
[\304\326\334\344\366\374\337]; }
}
return @strings;
}
our $opt_b = 0;
our $opt_n = 4;
our $opt_t = 0;
our $opt_T = 0;
our $opt_h;
getopts('b:n:t:Th');
if ($opt_h || @ARGV==0)
{
print <<HEND;
Usage: pstrings [-n <minchars>] [-b <maxbytes>] [-t radix] [-T] [-h]
<filename>
-n at least minchars characters make up the string
-b at most maxbytes are read from the file if specified.
Otherwise the entire file is read.
-t Print the offset within the file before each string. The single
character argument specifies the radix of the offset: o for octal,
x for hexadecimal, or d for decimal.
-T translates IBM encoded umlauts to ISO encoded ones
-h shows this help
The filename has to be in a NTish form.
HEND
exit 0;
}
my $file = $ARGV[0];
$file =~ s|\\|/|g;
die "Canot read from $file: $!" unless -r $file;
$opt_b = -s _ if $opt_b == 0;
my @s = strings($file,$opt_b,$opt_T,$opt_n, $opt_t) or die "No strings:
$!";
binmode STDOUT;
print join("\n",@s);
exit 0;
_______________________________________________
ActivePerl mailing list
[EMAIL PROTECTED]
To unsubscribe: http://listserv.ActiveState.com/mailman/mysubs