Hi,
I have a GraphDB with the following attributes:
Number of nodes: 3.6M
Number of relation types: 2 
Total size of DB: 9GB
    lucene : 160MB
    neostore.nodestore.db : 31MB
    neostore.propertystore.db : 2GB
    neostore.propertystore.db.strings : 4GB
    neostore.relationshipstore.db : 1.5GB

Machine characteristics:
    vm.dirty_background_ratio = 50
    vm.dirty_ratio = 80
    OS: Ubuntu x64
    CPU: Corei7
    MEM: 12GB

The following is our running scenario (The source code is attached):
1. Iterate over all nodes and extract a list of node IDs ("fillNodes" function).
2. For each node ID, initiate a worker thread that process the following items 
(8 threads are executed in parallel using a pool - "walk" function):
    -extract relationships of this node.
    -perform a light processing.
    -update results (in a ConcurrentHashMap).

Note that:
    -The above scenario is iterative. Roughly it runs 10 times.
    -No update is applied to the DB during running (read only).

After running the application:
    -Less than 4GB/12GB of memory is occupied. It seems that Neo4j is leveraged 
only 2GB of memory.
    -The hard disk is overloaded.
    -Only less than 20% of 8 cores is utilized in average. 

Some documents are available in the wiki regarding performance (Performance 
Guide, Configuration Settings, Linux Performance Guide). They are so general.
Would you please instruct me to have a better memory map and speed up my 
application?
I can benchmark different configurations and reflect the results in the wiki 
for 
future users.
Kind regards,
Amir



      
package wikipedia.wnng;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.TimeUnit;

import org.neo4j.graphdb.Direction;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.RelationshipType;
import org.neo4j.graphdb.Transaction;
import org.neo4j.index.IndexService;

public class RandomWalker {

        public enum NodeRelationshipTypes implements RelationshipType {
                go_to, related_to
        }

        class WalkerThread extends Thread {

                private String cui;

                public WalkerThread(String str) {
                        super(str);
                        cui = str;
                }

                public void run() {
                        // create current row of transition matrix
                        Map<String, Double> Pi = new HashMap<String, Double>();
                        Transaction tx = graphDb.beginTx();
                        try {
                                Node current_node = index.getSingleNode("cui", 
cui);
                                for (Iterator<Relationship> itr = 
current_node.getRelationships(NodeRelationshipTypes.go_to,
                                                Direction.INCOMING).iterator(); 
itr.hasNext();) {
                                        Relationship rel = itr.next();
                                        Node income_node = rel.getStartNode();
                                        
Pi.put(income_node.getProperty("cui").toString(), 
Double.valueOf(rel.getProperty("weight")
                                                        .toString()));
                                }
                                tx.success();
                        } finally {
                                tx.finish();
                        }

                        // calculate v2 = alpha * Pi * v1 + (1 - alpha) * v1
                        // Pi * v1
                        Double result = new Double(0.0);
                        for (String index : Pi.keySet()) {
                                if (v1.containsKey(index))
                                        result += Pi.get(index) * v1.get(index);
                        }
                        // alpha * Pi * v1
                        result *= alpha;
                        // (1 - alpha) * v1
                        Double temp = v1.get(cui);
                        temp = (temp == null) ? 0.0 : (1 - alpha) * temp;

                        // result = alpha * Pi * v1 + (1 - alpha) * v1[cui]
                        result += temp;

                        // update v2 with new value
                        v2.put(cui, result);
                }
        }

        private HashSet<String> nodes;

        private final double alpha = 0.5;
        private final double epsilon = 0.1;

        private ConcurrentMap<String, Double> v1;
        private ConcurrentMap<String, Double> v2;

        private final GraphDatabaseService graphDb;
        private final IndexService index;

        public RandomWalker() {

                v1 = new ConcurrentHashMap<String, Double>(); // feature vector 
1
                v2 = new ConcurrentHashMap<String, Double>(); // feature vector 
2

                // get singleton DatabaseService and Index
                graphDb = 
GraphDatabaseServiceManager.getInstance().getGraphDatabaseService();
                index = 
GraphDatabaseServiceManager.getInstance().getIndexService();

                fillNodes();
        }

        /**
         * Create a list of all nodes (cuis) for follow up processing.
         */
        private void fillNodes() {
                System.out.println("Iterating over all nodes and adding to the 
'nodes'");
                nodes = new HashSet<String>();
                Transaction tx = graphDb.beginTx();
                try {
                        for (Iterator<Node> itr = 
graphDb.getAllNodes().iterator(); itr.hasNext();) {
                                Node current_node = itr.next();
                                if 
(current_node.equals(graphDb.getReferenceNode()))
                                        continue;
                                String current_cui = 
current_node.getProperty("cui").toString();
                                nodes.add(current_cui);
                        }
                        tx.success();
                } finally {
                        tx.finish();
                }
                System.out.println("Number of nodes: " + nodes.size());
        }

        /**
         * get a list of seed concepts and start walking in concurrent threads. 
Finally return the enriched feature vector.
         * 
         * @param seed_concepts
         * @return
         */
        public HashMap<String, Double> walk(HashMap<String, Double> 
seed_concepts) {
                v1.clear();
                v2.clear();

                v1.putAll(seed_concepts);

                do {
                        // replace current feature vector(v1) with the new 
processed vector(v2)
                        if (!v2.isEmpty()) {
                                v1.clear();
                                v1.putAll(v2);
                                v2.clear();
                        }
                        NotifyingBlockingThreadPoolExecutor threadPoolExecutor 
= new NotifyingBlockingThreadPoolExecutor(8, 16, 90,
                                        TimeUnit.SECONDS);

                        for (Iterator<String> itr = nodes.iterator(); 
itr.hasNext();) {
                                String cui = itr.next();
                                threadPoolExecutor.execute(new 
WalkerThread(cui));
                        }

                        try {
                                threadPoolExecutor.await();
                        } catch (InterruptedException e) {
                                // Handle error
                        }

                        System.out.println("One iteration of a Ramdom Walk is 
done.");
                } while (!isConverge());

                HashMap<String, Double> v_star = new HashMap<String, Double>();
                v_star.putAll(v2);
                
                return v_star;
        }

        private double getDifference() {
                double diff_value = 0.0;
                for (String item : v1.keySet()) {
                        if (v2.containsKey(item))
                                diff_value += Math.pow(v1.get(item) - 
v2.get(item), 2);
                        else
                                diff_value += Math.pow(v1.get(item), 2);
                }
                for (String item : v2.keySet()) {
                        if (!v1.containsKey(item))
                                diff_value += Math.pow(v2.get(item), 2);
                }
                return Math.sqrt(diff_value);
        }

        private boolean isConverge() {
                System.out.println("Checking for convergence:");
                double diff = getDifference();
                System.out.println("\tdifference: " + diff);
                return (diff > epsilon) ? false : true;
        }

        public void shutdown() {
                index.shutdown();
                graphDb.shutdown();
        }

        public static void main(String[] args) {
                RandomWalker w = new RandomWalker();
                w.walk(new HashMap<String, Double>());
                System.out.println("END");
                return;
        }
}
_______________________________________________
Neo4j mailing list
[email protected]
https://lists.neo4j.org/mailman/listinfo/user

Reply via email to