John Nagle <[EMAIL PROTECTED]> wrote: > [EMAIL PROTECTED] wrote: > > On Apr 27, 9:07 am, John Nagle <[EMAIL PROTECTED]> wrote: > > > >>The CPython implementation is unreasonably slow compared > >>to good implementations of other dynamic languages such > >>as LISP and JavaScript. > > > > > > Why do you say CPython is slower than JavaScript? Please provide > > examples. > > See > > http://www.mozilla.org/projects/tamarin/faq.html > > Tamarin is a just-in-time compiler for Javascript.
...and is not yet released, as far as I can tell; this makes it kind of diffcult to verify any kinds of claims about its speed. So, I tried to check the current speed of actual, existing, released open-source stand-alone Javascript engines -- starting with Mozilla's own "spidermonkey" -- versus the CPython implementation which you claim is "unreasonably slow". I set myself a simple benchmark which I thought would be impartial (AKA "equally unfair to both sides":-) because it's a pretty weird task, for which neither engine has definitely been optimized: find out what pair (number-of-vowels, number-of-consonants) happens most often among the words in /usr/share/dict/words (the one I have on my Macbook Pro: the wordlist from Webster's Second International, 1934 edition). I ran into a few snags tied to me not being a particularly good Javascript programmer, and to limitations of the spidermonkey js that I could easily install (via MacPorts): no file I/O support (except for stdin/stdout), so the input has to come from stdin -- etc, etc; I tried making things fair by placing the same limitations on each implementation (also forcing Python to use stdin, etc). I'm sure my Javascript code can be made much better, but here is what I have so far, as a.js: var vowels_re = /[aeiou]/gi; var conson_re = /[bcdfghjklmnpqrstvwxyz]/gi; var v_w_count = new Object; while (1) { var x = readline(); if (x=='' || x==null) { break; } var vows = x.match(vowels_re); var numvows = (vows==null)?0:vows.length; var cons = x.match(conson_re); var numcons = (cons==null)?0:cons.length; var key = 100*numvows + numcons; v_w_count[key] = 1 + (v_w_count[key] || 0); } var topcombo = 0; var maxcount = 0; for (key in v_w_count) { var newcount = v_w_count[key]; if (newcount > maxcount) { maxcount = newcount; topcombo = key; } } var nc = topcombo % 100; var nv = (topcombo-nc) / 100; print("top combination: "+nv+" vowels, "+nc+" consonants (occurs "+maxcount+" times)."); and the result and timing: $ time js -f a.js </usr/share/dict/words top combination: 3 vowels, 5 consonants (occurs 14947 times). real 0m18.664s user 0m12.717s sys 0m0.370s Here's the Python equivalent, a.py -- with the same constraints about having to use REs for the vowel/consonant counting, stdin-only, etc: $ time python a.py </usr/share/dict/words top combination: 5 vowels, 3 consonants (occurs 14947 times). real 0m3.700s user 0m2.814s sys 0m0.869s I'm sure that Tamarin, _once it's released_, will vastly enhance Javascript's current mediocre showing. But, for now, it appears to border on the dishonest to claim that CPython is unreasonably slow compared to _existing_ implementations of Javascript (and it would obviously be _totally_ dishonest to compare existing implementations with "paper tiger" ones that are _not_ yet existing and released). The fact that this is not a task for which Python is especially optimized, btw, can be confirmed by comparing Python's timing with an equivalent C-coded program with the same restrictions (using pcre): #include <pcre.h> #include <string.h> #include <stdio.h> int re_count(char* st, pcre* re) { int ovec[3]; int count = 0; int len = strlen(st); int start = 0; int rc; while( (rc=pcre_exec(re, NULL, st, len, start, 0, ovec, 3)) >= 0) { count++; start = ovec[1]; } if (rc != PCRE_ERROR_NOMATCH) { printf("rc was %d\n", rc); } return count; } int store_counts[100*100]; void add_count(int nc, int nv) { store_counts[nc + 100*nv]++; } int max_count(int* nc, int* nv) { int bestloc = 0; int topcoun = 0; int i; for(i=0; i<100*100; i++) { if (store_counts[i] > topcoun) { bestloc = i; topcoun = store_counts[i]; } } *nc = bestloc % 100; *nv = bestloc / 100; return topcoun; } int main() { const char* errms; int erof; pcre* vowels_re = pcre_compile("[aeiou]", PCRE_CASELESS, &errms, &erof, NULL); if (!vowels_re) { printf("(%s) at (%d) on vowels\n", errms, erof); return 1; } pcre* conson_re = pcre_compile("[bcdfghjklmnpqrstvwxyz]", PCRE_CASELESS, &errms, &erof, NULL); if (!conson_re) { printf("(%s) at (%d) on conson\n", errms, erof); return 1; } char buffer[1000]; while (gets(buffer)) { int nv = re_count(buffer, vowels_re); int nc = re_count(buffer, conson_re); add_count(nv, nc); } int pnv, pnc, maxc; maxc = max_count(&pnv, &pnc); printf("top combination: %d vowels, %d consonants (occurs %d times).\n", pnv, pnc, maxc); return 0; } $ time ./a.out </usr/share/dict/words warning: this program uses gets(), which is unsafe. top combination: 3 vowels, 5 consonants (occurs 14947 times). real 0m0.673s user 0m0.635s sys 0m0.012s We do observe the typical ratio in runtimes -- about 5.35 in this case (around one order of magnitude is typical). Of course, it's easy to enhance these performances by doing away with regular expressions -- e.g., in Python, b.py: import string identity = string.maketrans('', '') def make_counter(lower_string): total_string = lower_string + lower_string.upper() def counter(s, ident=identity, thest=total_string): return len(s) - len(s.translate(ident, thest)) return counter def main(afile): count_vowels = make_counter('aeiou') count_conson = make_counter('bcdfghjklmnpqrstvwxyz') v_w_count = {} for line in afile: nv = count_vowels(line) nc = count_conson(line) key = nv, nc v_w_count[key] = 1 + v_w_count.get(key, 0) top = max(v_w_count, key=v_w_count.get) nc = top[0] nv = top[1] n = v_w_count[top] print "top combination:", nv, "vowels,", nc, "consonants (occurs", n, "times)." if __name__ == '__main__': import sys main(sys.stdin) easily speeds things up by over 3.5 times: $ time python b.py </usr/share/dict/words top combination: 5 vowels, 3 consonants (occurs 14947 times). real 0m1.045s user 0m0.976s sys 0m0.061s and a similar speedup can be had in C (also by eliminating the use of REs), e.g.: #include <string.h> #include <stdio.h> char * vowels = "aeiouAEIOU"; char * conson = "bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ"; void count_letters(char* astr, int* pnv, int* pnc) { int c; int nv=0; int nc=0; while(c = *astr++) { if(strchr(vowels, c)) nv++; else if(strchr(conson, c)) nc++; } *pnv = nv; *pnc = nc; } int store_counts[100*100]; void add_count(int nc, int nv) { store_counts[nc + 100*nv]++; } int max_count(int* nc, int* nv) { int bestloc = 0; int topcoun = 0; int i; for(i=0; i<100*100; i++) { if (store_counts[i] > topcoun) { bestloc = i; topcoun = store_counts[i]; } } *nc = bestloc % 100; *nv = bestloc / 100; return topcoun; } int main() { char buffer[1000]; while (gets(buffer)) { int nv, nc; count_letters(buffer, &nv, &nc); add_count(nv, nc); } int pnv, pnc, maxc; maxc = max_count(&pnv, &pnc); printf("top combination: %d vowels, %d consonants (occurs %d times).\n", pnv, pnc, maxc); return 0; } None of these sources is "super-optimized" (in particular, I'm sure it's just as easy to make the Javascript 3-4 times faster as it was for Python and C, if I only knew Javascript better -- but even the faster Python and C programs could well be pushed further), but I think that exactly because of this factor they may be "representative" of typical uses of the languages (inasmuch as a tiny benchmark can ever be "representative", of course). Alex -- http://mail.python.org/mailman/listinfo/python-list