goller 2004/10/06 03:40:23 Modified: src/java/org/apache/lucene/index IndexReader.java TermVectorsReader.java SegmentReader.java Added: src/test/org/apache/lucene/search TestMultiThreadTermVectors.java Log: Remove synchronization from TermVectors (Patch #30736) Revision Changes Path 1.39 +2 -2 jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java Index: IndexReader.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java,v retrieving revision 1.38 retrieving revision 1.39 diff -u -r1.38 -r1.39 --- IndexReader.java 6 Oct 2004 09:05:56 -0000 1.38 +++ IndexReader.java 6 Oct 2004 10:40:23 -0000 1.39 @@ -532,7 +532,7 @@ protected abstract void doClose() throws IOException; /** Release the write lock, if needed. */ - protected final void finalize() { + protected void finalize() { if (writeLock != null) { writeLock.release(); // release write lock writeLock = null; 1.6 +22 -11 jakarta-lucene/src/java/org/apache/lucene/index/TermVectorsReader.java Index: TermVectorsReader.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermVectorsReader.java,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- TermVectorsReader.java 5 Oct 2004 17:30:47 -0000 1.5 +++ TermVectorsReader.java 6 Oct 2004 10:40:23 -0000 1.6 @@ -22,11 +22,9 @@ import java.io.IOException; /** - * FIXME: relax synchro! - * * @version $Id$ */ -class TermVectorsReader { +class TermVectorsReader implements Cloneable { private FieldInfos fieldInfos; private IndexInput tvx; @@ -86,9 +84,9 @@ * @param docNum The document number to retrieve the vector for * @param field The field within the document to retrieve * @return The TermFreqVector for the document and field or null if there is no termVector for this field. - * @throws IOException + * @throws IOException if there is an error reading the term vector files */ - synchronized TermFreqVector get(int docNum, String field) throws IOException { + TermFreqVector get(int docNum, String field) throws IOException { // Check if no term vectors are available for this segment at all int fieldNumber = fieldInfos.fieldNumber(field); TermFreqVector result = null; @@ -137,13 +135,14 @@ return result; } - /** - * Return all term vectors stored for this document or null if there are no term vectors - * for the document. - * @throws IOException + * Return all term vectors stored for this document or null if the could not be read in. + * + * @param docNum The document number to retrieve the vector for + * @return All term frequency vectors + * @throws IOException if there is an error reading the term vector files */ - synchronized TermFreqVector[] get(int docNum) throws IOException { + TermFreqVector[] get(int docNum) throws IOException { TermFreqVector[] result = null; // Check if no term vectors are available for this segment at all if (tvx != null) { @@ -295,4 +294,16 @@ return tv; } + protected Object clone() { + TermVectorsReader clone = null; + try { + clone = (TermVectorsReader) super.clone(); + } catch (CloneNotSupportedException e) {} + + clone.tvx = (IndexInput) tvx.clone(); + clone.tvd = (IndexInput) tvd.clone(); + clone.tvf = (IndexInput) tvf.clone(); + + return clone; + } } 1.32 +31 -9 jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java Index: SegmentReader.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java,v retrieving revision 1.31 retrieving revision 1.32 diff -u -r1.31 -r1.32 --- SegmentReader.java 6 Oct 2004 09:05:56 -0000 1.31 +++ SegmentReader.java 6 Oct 2004 10:40:23 -0000 1.32 @@ -43,7 +43,8 @@ private FieldsReader fieldsReader; TermInfosReader tis; - TermVectorsReader termVectorsReader; + TermVectorsReader termVectorsReaderOrig = null; + ThreadLocal termVectorsLocal = new ThreadLocal(); BitVector deletedDocs = null; private boolean deletedDocsDirty = false; @@ -156,9 +157,15 @@ openNorms(cfsDir); if (fieldInfos.hasVectors()) { // open term vector files only as needed - termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos); + termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); } } + + protected void finalize() { + // patch for pre-1.4.2 JVMs, whose ThreadLocals leak + termVectorsLocal.set(null); + super.finalize(); + } protected void doCommit() throws IOException { if (deletedDocsDirty) { // re-write deleted @@ -193,8 +200,8 @@ closeNorms(); - if (termVectorsReader != null) - termVectorsReader.close(); + if (termVectorsReaderOrig != null) + termVectorsReaderOrig.close(); if (cfsReader != null) cfsReader.close(); @@ -456,6 +463,19 @@ } } + /** + * Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. + * @return TermVectorsReader + */ + private TermVectorsReader getTermVectorsReader() { + TermVectorsReader tvReader = (TermVectorsReader)termVectorsLocal.get(); + if (tvReader == null) { + tvReader = (TermVectorsReader)termVectorsReaderOrig.clone(); + termVectorsLocal.set(tvReader); + } + return tvReader; + } + /** Return a term frequency vector for the specified document and field. The * vector returned contains term numbers and frequencies for all terms in * the specified field of this document, if the field had storeTermVector @@ -465,9 +485,10 @@ public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException { // Check if this field is invalid or has no stored term vector FieldInfo fi = fieldInfos.fieldInfo(field); - if (fi == null || !fi.storeTermVector || termVectorsReader == null) + if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null) return null; - + + TermVectorsReader termVectorsReader = getTermVectorsReader(); return termVectorsReader.get(docNumber, field); } @@ -480,9 +501,10 @@ * @throws IOException */ public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException { - if (termVectorsReader == null) + if (termVectorsReaderOrig == null) return null; - + + TermVectorsReader termVectorsReader = getTermVectorsReader(); return termVectorsReader.get(docNumber); } } 1.1 jakarta-lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java Index: TestMultiThreadTermVectors.java =================================================================== package org.apache.lucene.search; /** * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import junit.framework.TestCase; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.English; /** * @author Bernhard Messer * @version $rcs = ' $Id: TestMultiThreadTermVectors.java,v 1.1 2004/10/06 10:40:23 goller Exp $ ' ; */ public class TestMultiThreadTermVectors extends TestCase { private RAMDirectory directory = new RAMDirectory(); public int numDocs = 100; public int numThreads = 3; public TestMultiThreadTermVectors(String s) { super(s); } public void setUp() throws Exception { IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true); //writer.setUseCompoundFile(false); //writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); Field fld = new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES); doc.add(fld); writer.addDocument(doc); } writer.close(); } public void test() { IndexReader reader = null; try { reader = IndexReader.open(directory); for(int i = 1; i <= numThreads; i++) testTermPositionVectors(reader, i); } catch (IOException ioe) { fail(ioe.getMessage()); } finally { if (reader != null) { try { /** close the opened reader */ reader.close(); } catch (IOException ioe) { ioe.printStackTrace(); } } } } public void testTermPositionVectors(final IndexReader reader, int threadCount) { MultiThreadTermVectorsReader[] mtr = new MultiThreadTermVectorsReader[threadCount]; for (int i = 0; i < threadCount; i++) { mtr[i] = new MultiThreadTermVectorsReader(); mtr[i].init(reader); } /** run until all threads finished */ int threadsAlive = mtr.length; while (threadsAlive > 0) { try { //System.out.println("Threads alive"); Thread.sleep(10); threadsAlive = mtr.length; for (int i = 0; i < mtr.length; i++) { if (mtr[i].isAlive() == true) { break; } threadsAlive--; } } catch (InterruptedException ie) {} } long totalTime = 0L; for (int i = 0; i < mtr.length; i++) { totalTime += mtr[i].timeElapsed; mtr[i] = null; } //System.out.println("threadcount: " + mtr.length + " average term vector time: " + totalTime/mtr.length); } } class MultiThreadTermVectorsReader implements Runnable { private IndexReader reader = null; private Thread t = null; private final int runsToDo = 100; long timeElapsed = 0; public void init(IndexReader reader) { this.reader = reader; timeElapsed = 0; t=new Thread(this); t.start(); } public boolean isAlive() { if (t == null) return false; return t.isAlive(); } public void run() { try { // run the test 100 times for (int i = 0; i < runsToDo; i++) testTermVectors(); } catch (Exception e) { e.printStackTrace(); } return; } private void testTermVectors() throws Exception { // check: int numDocs = reader.numDocs(); long start = 0L; for (int docId = 0; docId < numDocs; docId++) { start = System.currentTimeMillis(); TermFreqVector [] vectors = reader.getTermFreqVectors(docId); timeElapsed += System.currentTimeMillis()-start; // verify vectors result verifyVectors(vectors, docId); start = System.currentTimeMillis(); TermFreqVector vector = reader.getTermFreqVector(docId, "field"); timeElapsed += System.currentTimeMillis()-start; vectors = new TermFreqVector[1]; vectors[0] = vector; verifyVectors(vectors, docId); } } private void verifyVectors(TermFreqVector[] vectors, int num) { StringBuffer temp = new StringBuffer(); String[] terms = null; for (int i = 0; i < vectors.length; i++) { terms = vectors[i].getTerms(); for (int z = 0; z < terms.length; z++) { temp.append(terms[z]); } } if (!English.intToEnglish(num).trim().equals(temp.toString().trim())) System.out.println("worng term result"); } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]