On 06/06/2011 06:53 PM, Christian Siefkes wrote:
> I've just seen that somebody already did the same, but I too tried to
> optimize your wordcount implementation a bit and got to the following Vala
> program:

Meanwhile I fixed the program so that it will actually show the correct
output, i.e. the same line, word, and byte count like wc shows. (The byte
count will still be off if the file uses DOS-style instead of Unix-style
line endings, but it's a start.) The need to call char.isspace() (checking
for spaces and tabs) instead of just checking whether the char is a space
slows things down a bit:

$ time /usr/bin/wc ../shakespeare.txt
 124456  901325 5582655 ../shakespeare.txt

real    0m0.253s
user    0m0.248s
sys     0m0.000s

$ time wc2 ../shakespeare.txt
 124456  901325 5582655 ../shakespeare.txt

real    0m0.337s
user    0m0.320s
sys     0m0.008s

That's about 30% slower than the C version--still quite acceptable, I feel.
The modified program is appended below.

Best regards
        Christian


namespace WordCount {

    private int count_words(string str) {
        // strip string and check if it is empty
        string stripped = str.strip();
        if (stripped == "") {
            return 0;
        }

        // there is at least one word in a non-empty string
        int result = 1;
        bool in_word = true;

        for (int i = 1; i < stripped.length; i++) {
            if (stripped[i].isspace()) {
                if (in_word) {
                    // begin of non-word
                    result++;
                    in_word = false;
                }
            } else {
                if (!in_word) {
                    // begin of word
                    in_word = true;
                }
            }
        }
        return result;
    }

    public int main(string[] args) {
        File file;
        DataInputStream dis;
        string line;
        int wc, lc, bc;

        for (int i = 1; i < args.length; i++) {
            file = File.new_for_path(args[i]);

            try {
                dis = new DataInputStream(file.read());
                wc = lc = bc = 0;

                while ((line = dis.read_line(null)) != null) {
                    lc++;
                    bc += line.length + 1;
                    wc += count_words(line);
                }

                // print line + word + byte count + filename (like wc)
                stdout.printf("%7d %7d %7d %s\n", lc, wc, bc, args[i]);
            } catch (Error e) {
                stderr.printf("Error processing file '%s': %s\n",
                    file.get_path(), e.message );
            }

        }

        return 0;
    }

}


-- 
|------- Dr. Christian Siefkes ------- christ...@siefkes.net -------
| Homepage: http://www.siefkes.net/ | Blog: http://www.keimform.de/
|    Peer Production Everywhere:       http://peerconomy.org/wiki/
|---------------------------------- OpenPGP Key ID: 0x346452D8 --
Politics is for people who have a passion for changing life but lack a
passion for living it.
        -- Tom Robbins, Even Cowgirls Get the Blues

Attachment: signature.asc
Description: OpenPGP digital signature

_______________________________________________
vala-list mailing list
vala-list@gnome.org
http://mail.gnome.org/mailman/listinfo/vala-list

Reply via email to