Hello Rémi,

While I'm not sure this is the (only) problem, in our setup we also give mpirun the machines file:

setenv WIEN_MPIRUN "mpirun  -np _NP_ -machinefile _HOSTS_ _EXEC_"

which I generate based on a 1 k-point per node setup with the following python script:

/wienhybrid
#!/usr/bin/env python
#Machines file generator for WIEN2k
#May 13th 2013
#
#Michael Sluydts
#Center for Molecular Modeling
#Ghent University
from collections import Counter
import subprocess, os
nodefile = subprocess.Popen('echo $PBS_NODEFILE',stdout=subprocess.PIPE,shell=True)
nodefile = nodefile.communicate()[0].strip()
nodefile = open(nodefile,'r')

machines = nodefile.readlines()
nodefile.close()

node = ''
corecount=Counter()


#gather cores per nodes
for core in machines:
    node = core.split('.')[0]
    corecount[node] += 1



#if there are more nodes than k-points we must redistribute the remaining cores

#count the irreducible kpoints
IBZ = int(subprocess.Popen('wc -l < ' + os.getcwd().split('/')[-1] + '.klist',stdout=subprocess.PIPE,shell=True).communicate()[0])-2

corerank = corecount.most_common()

alloc = Counter()
total = Counter()
nodemap = []
#pick out the largest nodes and redivide the remaining ones by adding the largest leftover node to the k-point with least allocated cores

for node,cores in corerank:
    if len(alloc) < IBZ:
        alloc[node] += cores
        total[node] += cores
    else:
        lowcore = total.most_common()[-1][0]
        total[lowcore] += cores
        nodemap.append((node,lowcore))

#give lapw0 all cores
machinesfile = 'lapw0: ' + corecount.keys()[0] + ':' + str(corecount[corecount.keys()[0]]) + '\n'
#for node in corecount.keys():
#        machinesfile += node + ':' + str(corecount[node]) + ' '
#machinesfile += '\n'

#machinesfile = ''
for node in alloc.keys():
    #allocate main node
    machinesfile += '1:' + node + ':' + str(alloc[node])
    #machinesfile += '1:' + node
    #for i in range(1,alloc[node]):
    #    machinesfile += ' ' + node
    #distribute leftover nodes
    extra = [x for x,y in nodemap if y == node]
    for ext in extra:
        #machinesfile += ' ' + ext + ':' + str(corecount[ext])
        for i in range(1,corecount[ext]):
            machinesfile+=' ' + ext
    machinesfile += '\n'


#If your nodes do not all have the same specifications you may have to change the weights above 1: and the granularity below, if you use a residue machine you should remove extrafine and add the residue configuration
machinesfile += 'granularity:1\nextrafine:1\n'

#if you have memory issues or a limited bandwidth between nodes try uncommenting the following line (can always try it and see if it speeds things up)
#machinesfile += 'lapw2 vector split:2\n'

machines = file('.machines','w')
machines.write(machinesfile)
machines.close()



_______________________________________________
Wien mailing list
Wien@zeus.theochem.tuwien.ac.at
http://zeus.theochem.tuwien.ac.at/mailman/listinfo/wien
SEARCH the MAILING-LIST at:  
http://www.mail-archive.com/wien@zeus.theochem.tuwien.ac.at/index.html

Reply via email to