Just revisited this with a recent version of Julia plus minor code tweaks. 
  The resulting Julia times are competitive with Python.  

Python wall times: 0.416s, 0.389s, 0.389s, 0.389s
Julia elapsed times: 0.409s, 0.396s, 0.358s, 0.399s

Python
import re
from collections import Counter
 
fn = "data\\pg2600.txt" 
%time c = Counter(re.split('[ \n\r\t-.,:_";!]', open(fn).read()))
%time c = Counter(re.split('[ \n\r\t-.,:_";!]', open(fn).read()))
%time c = Counter(re.split('[ \n\r\t-.,:_";!]', open(fn).read()))
%time c = Counter(re.split('[ \n\r\t-.,:_";!]', open(fn).read()))

Julia: (breaking out the todict() function made a noticeable difference)
function todict(words)
    counts=Dict{SubString{UTF8String},Int}()
    sizehint(counts, 100000)
    w=SubString("blah",1)
    for w in words
        counts[w] = get(counts,w,0)+1
    end
    return counts
end




function count_words(fn::String)
    s=readall(fn)
    spl=Set([' ','\n','\r','\t','-','.',',',':','_','"',';','!'])
    words=split(s, spl, false);
    counts = todict(words)
    return counts
end


fn="data\\pg2600.txt"
@time c=count_words(fn)
@time c=count_words(fn)
@time c=count_words(fn)
@time c=count_words(fn)


On Tuesday, March 4, 2014 3:15:21 AM UTC-5, Roman Sinayev wrote:
>
> Why is Julia 2x slower than Python on this test?
>
> https://gist.github.com/lqdc/9342237
>

Reply via email to