Hi there,

I am trying to do some calculations on a single HDF5 file in parallel with 
`pmap`, but I have a hard time making sense of my profiling. I thought that 
multiple processes could access the same file at the same time when in read 
mode. I wanted to test this, and whether I could speed up my calculations 
by doing it in parallel. I get only minor differences in timing, and I 
wondered if this is reasonable. If my results are reasonable, is there 
anything I can do to speed up my calculations?

Consider the following files:

myparallel.jl
```
module MyParallel
using HDF5

const FILENAME = "junk.h5"
const LETTERS = 
["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P"]
const N = 100_000_000

function write_junk_singlefile()
    h5open(FILENAME, "w") do f
        for letter in LETTERS
            write(f, letter, rand(N))
        end
    end
end

function write_junk_multifiles()
    for letter in LETTERS
        h5open(letter*"_"*FILENAME, "w") do f
            write(f, letter, rand(N))
        end
    end
end

calc_singlefile(obj::String) = h5open(f->mean(f[obj][:]), FILENAME, "r")
calc_singlefile(objs::Vector{String}) = [calc_singlefile(obj) for obj in 
objs]
calc_singlefile_parallel(objs::Vector{String}) = pmap(calc_singlefile, objs)
calc_multifiles(obj::String) = h5open(f->mean(f[obj][:]), obj*"_"*FILENAME, 
"r")
calc_multifiles(objs::Vector{String}) = [calc_multifiles(obj) for obj in 
objs]
calc_multifiles_parallel(objs::Vector{String}) = pmap(calc_multifiles, objs)

export LETTERS, write_junk_singlefile, write_junk_multifiles, 
calc_singlefile, calc_singlefile_parallel,
calc_multifiles, calc_multifiles_parallel

end
```

and 

run_parallel_junk_calc.jl
```
addprocs(7)
@everywhere include("myparallel.jl")
@everywhere using MyParallel

write_junk_singlefile()
write_junk_multifiles()

println("singlefile single core processing:")
@time calc_singlefile(LETTERS)
println("multifiles single core processing:")
@time calc_multifiles(LETTERS)
println("singlefile multi core processing:")
@time calc_singlefile_parallel(LETTERS)
println("multifiles multi core processing:")
@time calc_multifiles_parallel(LETTERS)
```

when running `julia run_parallel_junk_calc.jl` i get the following:

WARNING: replacing module HDF5 (7 times)

and then,

singlefile single core processing:
 13.122327 seconds (643.51 k allocations: 11.948 GB, 8.58% gc time)
multifiles single core processing:
 12.176089 seconds (26.93 k allocations: 11.922 GB, 8.26% gc time)
singlefile multi core processing:
 15.712127 seconds (4.15 M allocations: 175.925 MB, 1.06% gc time)
multifiles multi core processing:
 11.240618 seconds (753.98 k allocations: 31.415 MB, 0.08% gc time)

Is there any way I can improve this behaviour?

Best
Jon Alm Eriksen

Reply via email to