I tried the built-in profiler, and find that the problem lies in lines I end with ******, the result is shown below: that proved my guess, how can I pre-allocate these arrays? If I don't want to divide this code into several parts that calculate these arrays separately.
| lines | backtrace | | 169 | 9011 | *********** | 173 | 1552 | | 175 | 2604 | | 179 | 2906 | | 181 | 1541 | | 192 | 4458 | | 211 | 13332 ************| | 214 | 8431 |************ | 218 | 15871 |*********** | 221 | 2538 | 在 2016年3月29日星期二 UTC+8下午9:27:27,Stefan Karpinski写道: > > Have you tried: > > (a) calling @code_typewarn on your function > (b) using the built-in profiler? > > > On Tue, Mar 29, 2016 at 9:23 AM, 博陈 <chenph...@gmail.com <javascript:>> > wrote: > >> First of all, have a look at the result. >> >> >> <https://lh3.googleusercontent.com/-anNt-E4P1vM/Vvp-TybegZI/AAAAAAAAABE/ZvDO2xarndMSgKVOXy_hcPd5NTh-7QcEA/s1600/QQ%25E5%259B%25BE%25E7%2589%258720160329210732.png> >> >> >> >> >> >> >> >> >> My code calculates the evolution of 1-d 2-electron system in the electric >> field, some variables are calculated during the evolution. >> According to the result of @time evolution, my code must have a >> pre-allocation problem. Before you see the long code, i suggest that the >> hotspot might be around the Arrays prop_e, \phio, pp. I have learnt that I >> can use m = Array(Float64, 1) outside a "for" loop and empty!(m) and >> push!(m, new_m) inside the loop to pre-allocate the variable m, but in my >> situations, I don't know how to pre-allocate these arrays. >> >> Below is the script (precisely, the main function) itself. >> >> function evolution(ϕ::Array{Complex{Float64}, 2}, >> ele::Array{Float64, 1}, dx::Float64, dt::Float64, >> flags::Tuple{Int64, Int64, Int64, Int64}) >> ϕg = copy(ϕ) >> FFTW.set_num_threads(8) >> ns = length( ϕ[:, 1] ) >> x = get_x(ns, dx) >> p = get_p(ns, dx) >> if flags[4] == 1 >> pp = similar(p) >> A = -cumsum(ele) * dt >> A² = A.*A >> ##### splitting >> r_sp = 150.0 >> δ_sp = 5.0 >> splitter = Array(Float64, ns, ns) >> end >> nt = length( ele ) >> >> # ##### Pre-allocate result and temporary arrays >> #if flags[1] == 1 >> σ = zeros(Complex128, nt) >> #end >> #if flags[2] == 1 >> a = zeros(Float64, nt) >> #end >> #if flags[3] == 1 >> r_ionization = 20.0 >> n1 = round(Int, ns/2 - r_ionization/dx) >> n2 = round(Int, ns/2 + r_ionization/dx) >> ip = zeros(Float64, nt) >> #end >> >> ##### FFT plan >> p_fft! = plan_fft!( similar(ϕ), flags=FFTW.MEASURE ) >> >> prop_x = similar( ϕ ) >> prop_p = similar( prop_x ) >> prop_e = similar( prop_x ) >> # this two versions just cost the same time >> xplusy = Array(Float64, ns, ns) >> #xplusy = Array( Float64, ns^2) >> >> ##### absorb boundary >> r_a = ns * dx /2 - 50.0 >> δ = 10.0 >> absorb = Array(Float64, ns, ns) >> >> k0 = 2π / (ns * dx) >> >> @inbounds for j in 1:ns >> @inbounds for i in 1:ns >> prop_x[i, j] = exp( -im * get_potential(x[i], x[j]) * dt / 2 ) >> prop_p[i, j] = exp( -im * (p[i]^2 + p[j]^2)/2 * dt ) >> >> xplusy[i, j] = x[i] + x[j] >> >> absorb[i, j] = (1.0 - get_out(x[i], r_a, δ ))* (1.0 - >> get_out(x[j], >> r_a, δ)) >> end >> end >> >> if flags[2] == 1 >> pvpx = Array(Float64, ns, ns) >> @inbounds for j in 1:ns >> @inbounds for i in 1:ns >> pvpx[i, j] = get_pvpx(x[i], x[j]) >> end >> end >> end >> >> if flags[4] == 1 >> ϕo = zeros(Complex128, ns, ns) >> ϕp = zeros(Complex128, ns, ns) >> @inbounds for j in 1:ns >> @inbounds for i in 1:ns >> splitter[i, j] = get_out(x[i], r_sp, δ_sp) * >> get_out(x[j], r_sp, δ_sp) >> end >> end >> end >> >> for i in 1:nt >> for j in eachindex(ϕ) >> prop_e[j] = exp( -im * ele[i] * xplusy[j] * dt/2.0) >> ************************************169 >> end >> >> for j in eachindex(ϕ) >> ϕ[j] *= prop_x[j] * prop_e[j] >> end >> p_fft! * ϕ >> for j in eachindex(ϕ) >> ϕ[j] *= prop_p[j] >> end >> p_fft! \ ϕ >> for j in eachindex(ϕ) >> ϕ[j] *= prop_x[j] * prop_e[j] >> end >> ########## autocorrelation function σ(t) >> if flags[1] == 1 >> for j in eachindex(ϕ) >> σ[i] += conj(ϕg[j]) * ϕ[j] >> end >> end >> ########## dipole acceleration a(t) >> if flags[2] == 1 >> for j in eachindex(ϕ) >> a[i] += abs(ϕ[j])^2 * (pvpx[j] + 2ele[i]) >> end >> end >> ########## ionization probability ip(t) >> if flags[3] == 1 >> for j1 in n1:n2 >> for j2 in 1:ns >> ip[i] += abs( ϕ[j2+ns*(j1-1)] )^2 >> end >> end >> for j1 in [1:n1-1; n2+1:ns] >> for j2 in n1:n2 >> ip[i] += abs( ϕ[j2+ns*(j1-1)] )^2 >> end >> end >> end >> ########## get momentum >> if flags[4] == 1 >> for j in eachindex(ϕo) >> ϕo[j] = ϕ[j] * splitter[j] * exp( -im * A[i]*xplusy[j] ) >> **********************************211 >> end >> for j in eachindex(p) >> pp[j] = p[j]^2 /2 * (nt-i) - p[j] *sum( A[i:nt] ) + sum( >> A²[1:nt] ) /2 ******************214 >> end >> for j2 in 1:ns >> for j1 in 1:ns >> ϕo[j1, j2] = ϕo[j1, j2] * exp( -im * (pp[j1] + >> pp[j2]) * dt)************************218 >> end >> end >> p_fft! * ϕo >> for j in eachindex(ϕp) >> ϕp[j] += ϕo[j] >> end >> end >> >> ########## absorb boundary >> if mod(i, 300) == 0 >> for j in eachindex(ϕ) >> ϕ[j] *= absorb[j] >> end >> end >> >> if (mod(i, 500) == 0) >> println("i = $i") >> flush(STDOUT) >> end >> end >> σ *= dx^2 >> a *= dx^2 >> ip *= dx^2 >> >> save("data/fs.jld", "ϕ", ϕ) >> if flags[1] == 1 >> save("data/sigma.jld", "σ", σ) >> end >> if flags[2] == 1 >> save("data/a.jld", "a", a) >> end >> if flags[3] == 1 >> save("data/ip.jld", "ip", ip) >> end >> if flags[4] == 1 >> save("data/pf.jld", "ϕp", ϕp) >> end >> >> #return σ, a, ip, ϕ >> nothing >> end >> >> >> >