I tried performance of BabuDB recently. I'm not very used to key-value stores, but I chose this layout of the keys/values:
key: key|value|id value: id I chose this layout since there can be many nodes (ids) indexed for each given key/value pair. Now, I found the insertion performance to be almost twice as fast in lucene, (maybe because I know how to tweak lucene). However the lookup speed was at least twice as fast in babudb. The data size was 5M key/value pairs with a couple of different keys. Each key/value pair got indexed with a not very large number of different ids. I also tried with a couple of different keys, but it didn't affect performance significantly in any case. Numbers: Lucene: insertion: ~20s lookup: ~2ms/query BabuDb: insertion: ~38s (I tried groups of 100k-500k) lookup: ~0.9ms/query Are those numbers something you'd expect from BabuDB? Code: TestBabuDb: ----------------------------------------------------------------- package org.neo4j.index.babu; import java.io.File; import java.io.IOException; import java.util.Iterator; import java.util.Map.Entry; import org.xtreemfs.babudb.BabuDB; import org.xtreemfs.babudb.BabuDBFactory; import org.xtreemfs.babudb.config.BabuDBConfig; import org.xtreemfs.babudb.log.DiskLogger.SyncMode; import org.xtreemfs.babudb.lsmdb.BabuDBInsertGroup; import org.xtreemfs.babudb.lsmdb.Database; public class TestBabuDb { public static void main( String[] args ) throws Exception { deleteFileOrDirectory( new File( "babudb" ) ); BabuDB babuDb = BabuDBFactory.createBabuDB( new BabuDBConfig( "babudb/db", "babudb/log", 1, 1024*1024*10, 0, SyncMode.FSYNC, 0, 0, false, 0, 0 ) ); Database db = babuDb.getDatabaseManager().createDatabase( "test", 1 ); long t = System.currentTimeMillis(); BabuDBInsertGroup group = db.createInsertGroup(); for ( int i = 0; i < 5000000; i++ ) { group.addInsert( 0, key( i, "key", "value" + i%10000 ), fastToBytes( i ) ); if ( i % 500000 == 0 ) { db.insert( group, null ).get(); group = db.createInsertGroup(); System.out.print( "." ); } } db.insert( group, null ).get(); System.out.println( "insert time (500k):" + (System.currentTimeMillis() - t) ); t = System.currentTimeMillis(); for ( int i = 0; i < 100; i++ ) { Iterator<Entry<byte[], byte[]>> entries = db.prefixLookup( 0, lookupKey( "key", "value" + i ), null ).get(); while ( entries.hasNext() ) { Entry<byte[], byte[]> entry = entries.next(); fastToLong( entry.getValue() ); } } System.out.println( "100 lookups:" + (System.currentTimeMillis() - t) ); db.shutdown(); babuDb.shutdown(); } private static byte[] fastToBytes( long value ) throws IOException { byte[] array = new byte[8]; for ( int i = 0; i < 8; i++ ) { array[7-i] = (byte) (value >>> (i * 8)); } return array; } private static long fastToLong( byte[] array ) throws IOException { long value = 0; for ( int i = 0; i < array.length; i++ ) { value <<= 8; value ^= (long) array[i] & 0xFF; } return value; } private static byte[] lookupKey( String key, Object value ) { return String.valueOf( key + "|" + value + "|" ).getBytes(); } private static byte[] key( long id, String key, Object value ) { return String.valueOf( key + "|" + value + "|" + id ).getBytes(); } public static void deleteFileOrDirectory( File file ) { if ( !file.exists() ) { return; } if ( file.isDirectory() ) { for ( File child : file.listFiles() ) { deleteFileOrDirectory( child ); } file.delete(); } else { file.delete(); } } } ----------------------------------------------------------------- TestLucene: ----------------------------------------------------------------- package org.neo4j.index; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.KeywordAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestLucene { public static void main( String[] args ) throws IOException { File path = new File( "lcn" ); deleteFileOrDirectory( path ); Directory dir = FSDirectory.open( path ); IndexWriter writer = new IndexWriter( dir, new KeywordAnalyzer(), MaxFieldLength.UNLIMITED ); writer.setMaxBufferedDocs( 100000 ); long t = System.currentTimeMillis(); for ( int i = 0; i < 5000000; i++ ) { Document doc = new Document(); doc.add( new Field( "_id_", fastToBytes( i ), Store.YES ) ); doc.add( new Field( "key", "value" + i%10000, Store.NO, Index.NOT_ANALYZED ) ); writer.addDocument( doc ); if ( i % 100000 == 0 ) System.out.print( "." ); } writer.commit(); System.out.println( "insert:" + (System.currentTimeMillis() - t) ); IndexReader reader = writer.getReader(); IndexSearcher searcher = new IndexSearcher( reader ); t = System.currentTimeMillis(); for ( int i = 0; i < 100; i++ ) { Query query = new TermQuery( new Term( "key", "value" + i ) ); TopDocs docs = searcher.search( query, 100 ); for ( ScoreDoc scoreDoc : docs.scoreDocs ) { Document doc = searcher.doc( scoreDoc.doc ); fastToLong( doc.getBinaryValue( "_id_" ) ); } } System.out.println( "get:" + (System.currentTimeMillis() - t) ); } public static void deleteFileOrDirectory( File file ) { if ( !file.exists() ) { return; } if ( file.isDirectory() ) { for ( File child : file.listFiles() ) { deleteFileOrDirectory( child ); } file.delete(); } else { file.delete(); } } private static byte[] fastToBytes( long value ) throws IOException { byte[] array = new byte[8]; for ( int i = 0; i < 8; i++ ) { array[7-i] = (byte) (value >>> (i * 8)); } return array; } private static long fastToLong( byte[] array ) throws IOException { long value = 0; for ( int i = 0; i < array.length; i++ ) { value <<= 8; value ^= (long) array[i] & 0xFF; } return value; } } ----------------------------------------------------------------- 2010/4/29 Atle Prange <atle.pra...@gmail.com>: > I am using the event framework to auto-index nodes based on a special name i > give the properties: Whenever i want a property to be indexed, i append > :index=<name> to the property-name. On afterCommit() i check all changed > properties, and re-index the values that have changed. Really simple and > smooth, the framework makes it really easy. > > BTW.: I wrote a new IndexService using BabuDB [1]. BabuDB is really fast, > but does not work for fulltext query. For now the IndexService is embedded > in the Object-graph mapper i wrote [2]. Anyone is free to check it out, if > it looks interesting it could be moved to a neo4j subproject. > > -atle > > > [1] http://code.google.com/p/babudb/ > <http://code.google.com/p/babudb/>[2] http://ogrm.org > > > On Thu, Apr 29, 2010 at 2:08 PM, Atle Prange <atle.pra...@gmail.com> wrote: > >> Fixed, nice :) >> >> atle >> >> >> On Thu, Apr 29, 2010 at 12:16 PM, Mattias Persson < >> matt...@neotechnology.com> wrote: >> >>> Oo, wait a minute... I saw the problem. Try again with the latest code >>> (just >>> committed)! >>> >>> 2010/4/29 Atle Prange <atle.pra...@gmail.com> >>> >>> > Really nice, but it seems that afterCommit() never receives any data, >>> the >>> > TransactionData argument is always null.. >>> > >>> > On Tue, Apr 27, 2010 at 10:15 AM, Mattias Persson < >>> > matt...@neotechnology.com >>> > > wrote: >>> > >>> > > Hi everyone! >>> > > >>> > > I'm quite pleased to announce that the new event framework has been >>> > > committed in kernel 1.1-SNAPSHOT. After feedback from you guys the >>> > > framework >>> > > got quite small and cosy. Here's a summary. >>> > > >>> > > *TransactionEventHandler* >>> > > can be registered at a GraphDatabaseService and will thereafter >>> recieve >>> > > diffs of modifications made in each transaction before and after they >>> are >>> > > committed. The transaction can still be modified in beforeCommit() and >>> > that >>> > > method can also throw an exception to prevent the transaction from >>> > > committing, causing a TransactionFailureException to be thrown from >>> the >>> > > code >>> > > which is committing the transaction. >>> > > >>> > > *KernelEventHandler* >>> > > can be registered at a GraphDatabaseService and will receive >>> > notifications >>> > > about when a shutdown is about to occur for the GraphhDatabaseService >>> > > instance. It will also receive notifications about "kernel panics" >>> which >>> > is >>> > > a state which the kernel can come to from where it cannot continue >>> > without >>> > > needing to be restarted. An example of such an error would be a hard >>> > drive >>> > > breakdown or when no more space is left on the device the graph >>> database >>> > is >>> > > running on. >>> > > >>> > > The next step is to write an "auto indexer" for the IndexService so >>> that >>> > > that you won't have to do the manual indexService.index( node, key, >>> value >>> > ) >>> > > anymore. Another thing would be to remove (deprecate) the >>> > > IndexService#shutdown() method as it no longer would be required. >>> > > >>> > > So it'd be great if you guys would try this out and tell us how it >>> feels. >>> > > >>> > > -- >>> > > Mattias Persson, [matt...@neotechnology.com] >>> > > Hacker, Neo Technology >>> > > www.neotechnology.com >>> > > _______________________________________________ >>> > > Neo mailing list >>> > > User@lists.neo4j.org >>> > > https://lists.neo4j.org/mailman/listinfo/user >>> > > >>> > _______________________________________________ >>> > Neo mailing list >>> > User@lists.neo4j.org >>> > https://lists.neo4j.org/mailman/listinfo/user >>> > >>> >>> >>> >>> -- >>> Mattias Persson, [matt...@neotechnology.com] >>> Hacker, Neo Technology >>> www.neotechnology.com >>> _______________________________________________ >>> Neo mailing list >>> User@lists.neo4j.org >>> https://lists.neo4j.org/mailman/listinfo/user >>> >> >> > _______________________________________________ > Neo mailing list > User@lists.neo4j.org > https://lists.neo4j.org/mailman/listinfo/user > -- Mattias Persson, [matt...@neotechnology.com] Hacker, Neo Technology www.neotechnology.com _______________________________________________ Neo4j mailing list User@lists.neo4j.org https://lists.neo4j.org/mailman/listinfo/user