In message <Pine.SGI.4.21.0201230722470.3473041-100000@the-gimp>, Dave writes:
>
>Hardeep Singh wrote:
>> Setting to ISO-LATIN-1throws the UnsupportedEncodingException
>
>Maybe try "8859_1" in place of "ISO-LATIN-1"

I finally got around to running several tests today.  I wrote a test
program and searched various binary files with several different regular
expressions.  The default encoding worked, US-ASCII resulted in the
described index out of bounds exceptions, but ISO-8859-1 was fine.  US-ASCII
is a 7-bit character set, so there's probably some issue there.  But
ISO-8859-1 is 8-bit.  I had no problems running matches against jar files,
executables, mp3's, or jpg's.  So if you're still having problems, please
post some sample code and input where I or someone else can download it
and try it out. 

I've attached my test program.  Should I add it to the awk examples in CVS?

daniel

import java.io.*;
import org.apache.oro.text.regex.*;
import org.apache.oro.text.awk.*;

public final class strings {

  public static final class StringFinder {
    /**
     * Default string expression.  Looks for at least 4 contiguous
     * printable characters.  Differs slightly from GNU strings command
     * in that any printable character may start a string.
     */
    public static final String DEFAULT_PATTERN =
      "[\\x20-\\x7E]{3}[\\x20-\\x7E]+";

    Pattern pattern;
    AwkMatcher matcher;

    public StringFinder(String regex) throws MalformedPatternException {
      AwkCompiler compiler = new AwkCompiler();
      pattern = compiler.compile(regex, AwkCompiler.CASE_INSENSITIVE_MASK);
      matcher = new AwkMatcher();
    }

    public StringFinder() throws MalformedPatternException {
      this(DEFAULT_PATTERN);
    }

    public void search(Reader input, PrintWriter output) throws IOException {
      MatchResult result;
      AwkStreamInput in = new AwkStreamInput(input);

      while(matcher.contains(in, pattern)) {
        result = matcher.getMatch();  
        output.println(result);
      }
      output.flush();
    }
  }


  public static final void main(String args[]) {
    String regex = StringFinder.DEFAULT_PATTERN;
    String filename, encoding = "ISO-8859-1";
    StringFinder finder;
    Reader file = null;

    if(args.length < 1) {
      System.err.println("usage: strings file [pattern] [encoding]");
      return;
    }

    filename = args[0];

    if(args.length > 1)
      regex = args[1];

    if(args.length > 2)
      encoding = args[2];

    try {
      finder = new StringFinder(regex);
      file =
        new InputStreamReader(new FileInputStream(filename), encoding);
      finder.search(file, new PrintWriter(new OutputStreamWriter(System.out)));
    } catch(Exception e) {
      e.printStackTrace();
      return;
    }
  }
}

--
To unsubscribe, e-mail:   <mailto:[EMAIL PROTECTED]>
For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>

Reply via email to