2009/9/18 <matt...@lists.neo4j.org>: > Author: mattias > Date: 2009-09-18 13:14:47 +0200 (Fri, 18 Sep 2009) > New Revision: 3108 > > Modified: > > components/index-util/trunk/src/main/java/org/neo4j/util/index/LuceneDataSource.java > > components/index-util/trunk/src/main/java/org/neo4j/util/index/LuceneTransaction.java > Log: > Made a few changes which affects performance in following ways: > > + Index lookups (getNodes(), getSingleNode()) scales better with > larger indices because of the use of IndexReader#reopen() instead > of closing it and opening it from scratch. > + Better performance when doing mixed inserts/removes for the > same index. > - Slightly slower performance when only doing a lot of removes > from an index. > > I think the plus statements weigh more than the minus statement since > those are more real-life scenarios. > > See r202 > I meant #202 > > Modified: > components/index-util/trunk/src/main/java/org/neo4j/util/index/LuceneDataSource.java > =================================================================== > --- > components/index-util/trunk/src/main/java/org/neo4j/util/index/LuceneDataSource.java > 2009-09-18 08:15:00 UTC (rev 3107) > +++ > components/index-util/trunk/src/main/java/org/neo4j/util/index/LuceneDataSource.java > 2009-09-18 11:14:47 UTC (rev 3108) > @@ -37,13 +37,14 @@ > import org.apache.lucene.document.Field; > import org.apache.lucene.document.Field.Index; > import org.apache.lucene.index.CorruptIndexException; > +import org.apache.lucene.index.IndexReader; > import org.apache.lucene.index.IndexWriter; > import org.apache.lucene.index.Term; > import org.apache.lucene.index.IndexWriter.MaxFieldLength; > -import org.apache.lucene.search.Hits; > +import org.apache.lucene.search.BooleanQuery; > import org.apache.lucene.search.IndexSearcher; > -import org.apache.lucene.search.Query; > import org.apache.lucene.search.TermQuery; > +import org.apache.lucene.search.BooleanClause.Occur; > import org.apache.lucene.store.Directory; > import org.apache.lucene.store.FSDirectory; > import org.neo4j.impl.cache.LruCache; > @@ -249,28 +250,67 @@ > lock.writeLock().unlock(); > } > > + /** > + * If nothing has changed underneath (since the searcher was last created > + * or refreshed) {...@code null} is returned. But if something has > changed a > + * refreshed searcher is returned. It makes use if the > + * {...@link IndexReader#reopen()} which faster than opening an index > from > + * scratch. > + * > + * @param searcher the {...@link IndexSearcher} to refresh. > + * @return a refreshed version of the searcher or, if nothing has > changed, > + * {...@code null}. > + * @throws IOException if there's a problem with the index. > + */ > + private IndexSearcher refreshSearcher( IndexSearcher searcher ) > + { > + try > + { > + IndexReader reopened = searcher.getIndexReader().reopen(); > + if ( reopened != null ) > + { > + return new IndexSearcher( reopened ); > + } > + return null; > + } > + catch ( IOException e ) > + { > + throw new RuntimeException( e ); > + } > + } > + > + private Directory getDirectory( String key ) throws IOException > + { > + return FSDirectory.getDirectory( new File( storeDir, key ) ); > + } > + > + /** > + * @param key the key for the index, i.e. which index to return a > searcher > + * for > + * @return an {...@link IndexSearcher} for the index for {...@key}. If > no such > + * searcher has been opened before it is opened here. > + */ > IndexSearcher getIndexSearcher( String key ) > { > - IndexSearcher searcher = indexSearchers.get( key ); > - if ( searcher == null ) > + try > { > - try > + IndexSearcher searcher = indexSearchers.get( key ); > + if ( searcher == null ) > { > - Directory dir = FSDirectory.getDirectory( > - new File( storeDir + "/" + key ) ); > + Directory dir = getDirectory( key ); > if ( dir.list().length == 0 ) > { > return null; > } > searcher = new IndexSearcher( dir ); > + indexSearchers.put( key, searcher ); > } > - catch ( IOException e ) > - { > - throw new RuntimeException( e ); > - } > - indexSearchers.put( key, searcher ); > + return searcher; > } > - return searcher; > + catch ( IOException e ) > + { > + throw new RuntimeException( e ); > + } > } > > public XaTransaction createTransaction( int identifier, > @@ -279,20 +319,16 @@ > return new LuceneTransaction( identifier, logicalLog, this ); > } > > - void removeIndexSearcher( String key ) > + void invalidateIndexSearcher( String key ) > { > - IndexSearcher searcher = indexSearchers.remove( key ); > + IndexSearcher searcher = indexSearchers.get( key ); > if ( searcher != null ) > { > - try > + IndexSearcher refreshedSearcher = refreshSearcher( searcher ); > + if ( refreshedSearcher != null ) > { > - searcher.close(); > + indexSearchers.put( key, refreshedSearcher ); > } > - catch ( IOException e ) > - { > - throw new RuntimeException( > - "Unable to close index searcher[" + key + "]", e ); > - } > } > } > > @@ -300,8 +336,7 @@ > { > try > { > - Directory dir = FSDirectory.getDirectory( > - new File( storeDir + "/" + key ) ); > + Directory dir = getDirectory( key ); > return new IndexWriter( dir, getAnalyzer(), > MaxFieldLength.UNLIMITED ); > } > @@ -310,35 +345,23 @@ > throw new RuntimeException( e ); > } > } > - > - protected void deleteDocumentUsingReader( IndexSearcher searcher, > + > + protected void deleteDocumentsUsingWriter( IndexWriter writer, > long nodeId, Object value ) > { > - if ( searcher == null ) > - { > - return; > - } > - Query query = new TermQuery( new Term( getDeleteDocumentsKey(), > - value.toString() ) ); > try > { > - Hits hits = searcher.search( query ); > - for ( int i = 0; i < hits.length(); i++ ) > - { > - Document document = hits.doc( i ); > - int foundId = Integer.parseInt( document.getField( > - LuceneIndexService.DOC_ID_KEY ).stringValue() ); > - if ( nodeId == foundId ) > - { > - int docNum = hits.id( i ); > - searcher.getIndexReader().deleteDocument( docNum ); > - } > - } > + BooleanQuery query = new BooleanQuery(); > + query.add( new TermQuery( new Term( getDeleteDocumentsKey(), > + value.toString() ) ), Occur.MUST ); > + query.add( new TermQuery( new Term( > LuceneIndexService.DOC_ID_KEY, > + "" + nodeId ) ), Occur.MUST ); > + writer.deleteDocuments( query ); > } > catch ( IOException e ) > { > throw new RuntimeException( "Unable to delete for " + nodeId + "," > - + "," + value + " using" + searcher, e ); > + + "," + value + " using" + writer, e ); > } > } > > > Modified: > components/index-util/trunk/src/main/java/org/neo4j/util/index/LuceneTransaction.java > =================================================================== > --- > components/index-util/trunk/src/main/java/org/neo4j/util/index/LuceneTransaction.java > 2009-09-18 08:15:00 UTC (rev 3107) > +++ > components/index-util/trunk/src/main/java/org/neo4j/util/index/LuceneTransaction.java > 2009-09-18 11:14:47 UTC (rev 3108) > @@ -30,7 +30,6 @@ > > import org.apache.lucene.document.Document; > import org.apache.lucene.index.IndexWriter; > -import org.apache.lucene.search.IndexSearcher; > import org.neo4j.api.core.Node; > import org.neo4j.impl.transaction.xaframework.XaCommand; > import org.neo4j.impl.transaction.xaframework.XaLogicalLog; > @@ -47,10 +46,8 @@ > > private final LuceneDataSource luceneDs; > > - private final Map<String,List<AddCommand>> addCommandMap = > - new HashMap<String,List<AddCommand>>(); > - private final Map<String,List<RemoveCommand>> removeCommandMap = > - new HashMap<String,List<RemoveCommand>>(); > + private final Map<String,List<LuceneCommand>> commandMap = > + new HashMap<String,List<LuceneCommand>>(); > > LuceneTransaction( int identifier, XaLogicalLog xaLog, > LuceneDataSource luceneDs ) > @@ -156,34 +153,15 @@ > @Override > protected void doAddCommand( XaCommand command ) > { > - // command added either through addCommand or injectCommand > - if ( command instanceof AddCommand ) > + LuceneCommand luceneCommand = ( LuceneCommand ) command; > + String key = luceneCommand.getKey(); > + List<LuceneCommand> list = commandMap.get( key ); > + if ( list == null ) > { > - AddCommand addCommand = (AddCommand) command; > - List<AddCommand> list = addCommandMap.get( addCommand.getKey() ); > - if ( list == null ) > - { > - list = new ArrayList<AddCommand>(); > - addCommandMap.put( addCommand.getKey(), list ); > - } > - list.add( addCommand ); > + list = new ArrayList<LuceneCommand>(); > + commandMap.put( key, list ); > } > - else if ( command instanceof RemoveCommand ) > - { > - RemoveCommand removeCommand = (RemoveCommand) command; > - List<RemoveCommand> list = removeCommandMap.get( removeCommand > - .getKey() ); > - if ( list == null ) > - { > - list = new ArrayList<RemoveCommand>(); > - removeCommandMap.put( removeCommand.getKey(), list ); > - } > - list.add( removeCommand ); > - } > - else > - { > - throw new RuntimeException( "Unknown command: " + command ); > - } > + list.add( luceneCommand ); > } > > @Override > @@ -192,34 +170,32 @@ > luceneDs.getWriteLock(); > try > { > - for ( String key : removeCommandMap.keySet() ) > + for ( Map.Entry<String, List<LuceneCommand>> entry : > + this.commandMap.entrySet() ) > { > - IndexSearcher searcher = luceneDs.getIndexSearcher( key ); > - if ( searcher != null ) // meaning such a index exist > + String key = entry.getKey(); > + IndexWriter writer = luceneDs.getIndexWriter( key ); > + for ( LuceneCommand command : entry.getValue() ) > { > - List<RemoveCommand> commands = removeCommandMap.get( key > ); > - for ( RemoveCommand cmd : commands ) > + if ( command instanceof AddCommand ) > { > - String value = cmd.getValue(); > - long id = cmd.getNodeId(); > - luceneDs.deleteDocumentUsingReader( searcher, id, > - value ); > - luceneDs.invalidateCache( key, value ); > + indexWriter( writer, command.getNodeId(), key, > + command.getValue() ); > } > + else if ( command instanceof RemoveCommand ) > + { > + luceneDs.deleteDocumentsUsingWriter( > + writer, command.getNodeId(), command.getValue() > ); > + } > + else > + { > + throw new RuntimeException( "Unknown command type " + > + command + ", " + command.getClass() ); > + } > + luceneDs.invalidateCache( key, command.getValue() ); > } > - luceneDs.removeIndexSearcher( key ); > - } > - for ( String key : addCommandMap.keySet() ) > - { > - IndexWriter writer = luceneDs.getIndexWriter( key ); > - List<AddCommand> commands = addCommandMap.get( key ); > - for ( AddCommand cmd : commands ) > - { > - indexWriter( writer, cmd.getNodeId(), key, > cmd.getValue() ); > - luceneDs.invalidateCache( key, cmd.getValue() ); > - } > luceneDs.removeWriter( key, writer ); > - luceneDs.removeIndexSearcher( key ); > + luceneDs.invalidateIndexSearcher( key ); > } > } > finally > @@ -227,7 +203,7 @@ > luceneDs.releaseWriteLock(); > } > } > - > + > @Override > protected void doPrepare() > { > @@ -260,8 +236,7 @@ > protected void doRollback() > { > // TODO Auto-generated method stub > - addCommandMap.clear(); > - removeCommandMap.clear(); > + commandMap.clear(); > txIndexed.clear(); > txRemoved.clear(); > } > > _______________________________________________ > Commits mailing list > comm...@lists.neo4j.org > https://lists.neo4j.org/mailman/listinfo/commits >
-- Mattias Persson, [matt...@neotechnology.com] Neo Technology, www.neotechnology.com _______________________________________________ Neo mailing list User@lists.neo4j.org https://lists.neo4j.org/mailman/listinfo/user