Hi there!

This may not be the right forum for this question, but sure someone from
Red Hat would have something to say, and other users could share their
experiences on the subject.

Using gcj from Red Hat 7.1 I compliled a small Java app, a duplicate
file finder.

I became very surprised when I saw it was 3x slower than the same app
running on IBM Java 1.3.0. Is the IBM jitc so good, is gcj so bad, or am
missing something obvious?

Here's the program, so anyone can try. It first checks the sizes of all
files on the command line, and calculates a checksum of the files with
the same size. I'm sory the messages and variable names are in
portuguese, but I hope the program will be easy to understand anyway.

[]s, Fernando Lozano


File ArquivosDuplicados2.java
-----------------------------------

import java.util.*;
import java.util.zip.*;
import java.io.*;


class PossiveisDuplicatas2
{
    // Cada elemento do Hash é um ArrayList contendo instâncias de File
    private Hashtable porChave = new Hashtable ();
    private int qtdeArquivos = 0;
    
    public void add (Object chave, File arq)
    {
        Vector arquivos = (Vector)porChave.get (chave);
        if (arquivos == null) {
            arquivos = new Vector ();
            porChave.put (chave, arquivos); 
        }
        arquivos.addElement (arq);
        qtdeArquivos++;

    }
    
    public Enumeration chaves ()
    {
        return porChave.keys ();
    }
    
    public Vector arquivos (Object chave)
    {
        return (Vector)porChave.get (chave);
    }
    
    public int quantasChaves ()
    {
        return porChave.size ();
    }
    
    public int quantosArquivos ()
    {
        return qtdeArquivos;
    }
    
    public void eliminaUnicos ()
    {
        Enumeration enum = chaves ();
        while (enum.hasMoreElements ()) {
            Object chave = enum.nextElement ();
            Vector arquivos = arquivos (chave);
            if (arquivos.size () < 2) {
                qtdeArquivos -= arquivos.size ();
                porChave.remove (chave);
            }
        }
    }
}

public class ArquivosDuplicados2
{
    static private PossiveisDuplicatas2 agrupaPorTamanho (String[]
arquivos)
    {
        PossiveisDuplicatas2 dups = new PossiveisDuplicatas2 ();
        for (int i = 0; i < arquivos.length; i++) {
            File arq = new File (arquivos[i]);
            dups.add (new Long (arq.length ()), arq);
        }
        return dups;
    }
    
    static private PossiveisDuplicatas2 agrupaPorChecksum
(PossiveisDuplicatas2 dups)
    {
        PossiveisDuplicatas2 novosDups = new PossiveisDuplicatas2 ();
        Enumeration enum1 = dups.chaves ();
        while (enum1.hasMoreElements ()) {
            Object chave = enum1.nextElement ();
            PossiveisDuplicatas2 mesmoTamanho = new PossiveisDuplicatas2 ();
            Enumeration enum2 = dups.arquivos (chave).elements ();
            while (enum2.hasMoreElements ()) {
                File arq = (File)enum2.nextElement ();
                try {
                    mesmoTamanho.add (new Long (calculaChecksum (arq)), arq);
                }
                catch (Exception e) {
                    // por enquanto, ignora erros de I/O
                    //System.err.println ("Erro lendo " + arq.getAbsolutePath ());
                    //System.err.println (e);
                    //System.err.println (e.getMessage ());
                }
            }
            mesmoTamanho.eliminaUnicos ();
            Enumeration enum3 = mesmoTamanho.chaves ();
            while (enum3.hasMoreElements ()) {
                Object novaChave = enum3.nextElement ();
                String chaveComposta = chave.toString () + "/" +
                    novaChave.toString ();
                Enumeration enum4 = mesmoTamanho.arquivos (novaChave).elements ();
                while (enum4.hasMoreElements ())
                    novosDups.add (chaveComposta, (File)enum4.nextElement ());
            }
        }
        return novosDups;
    }

    static private long calculaChecksum (File arq) throws IOException
    {
        //java.util.zip.CheckedInputStream
        //java.util.zip.CRC32
        //java.util.zip.Adler32
        Adler32 checksum = new Adler32 ();
        CheckedInputStream in = new CheckedInputStream (
            new BufferedInputStream (new FileInputStream (arq)),
            checksum);
        while (in.read () != -1) ;
        return checksum.getValue ();
    }
    
    static private void listaDuplicatas (PossiveisDuplicatas2 dups,
String agrupamento)
    {
        Enumeration it1 = dups.chaves ();
        while (it1.hasMoreElements ()) {
            Object chave = it1.nextElement ();
            System.out.println (agrupamento + ": " + chave);
            Enumeration enum2 = dups.arquivos (chave).elements ();
            while (enum2.hasMoreElements ()) {
                File arq = (File)enum2.nextElement ();
                System.out.println (arq.getAbsolutePath ());
            }
        }
    }

    private static void listaEstatisticas (int total,
PossiveisDuplicatas2 dups)
    {
        System.out.println ("Analizados " + total + " arquivos.");
        System.out.println ("Encontradas " + dups.quantosArquivos () + "
possíveis duplicatas.");
        System.out.println ("em " + dups.quantasChaves () + " grupos.");
    }

    public static void main (String[] args)
    {
        System.out.println ();
        System.out.println ("Passo1: Comparando tamanhos");
        System.out.print ("Varrendo arquivos... ");
        PossiveisDuplicatas2 dups = agrupaPorTamanho (args);
        int total = dups.quantosArquivos ();
        dups.eliminaUnicos ();
        listaEstatisticas (total, dups);
        //listaDuplicatas (dups, "Tamanho (Bytes)");
        System.out.println ();
        System.out.println ("Passo2: Comparando checksums");
        System.out.print ("Varrendo arquivos... ");
        total = dups.quantosArquivos ();
        PossiveisDuplicatas2 dups2 = agrupaPorChecksum (dups);
        listaEstatisticas (total, dups2);
        listaDuplicatas (dups2, "Tamanho (Bytes)/Checksum");
    }
}

End of file ArquivosDuplicados2.java
-------------------------------------




_______________________________________________
Redhat-list mailing list
[EMAIL PROTECTED]
https://listman.redhat.com/mailman/listinfo/redhat-list

Reply via email to