I finally got around to making the NioFSDirectory with caching 1.9
compliant. I also produced a performance test case.

Below is the results on my machine:

read random = 586391
read same = 68578
nio read random = 72766
nio max mem = 203292672
nio memory = 102453248
nio hits = 14974713
nio misses = 25039
nio hit rate = 99
nio read same = 22344

The most important statistic is that the reading via the local cache, vs.
going to the OS (where the block is cached) is 3x faster (22344 vs. 68578).
With random reads, when the block may not be in the OS cache, it is 8x
faster (72766 vs. 586391).

Attached are all of the files needed to run the test (The NioFSDirectory is
not needed for the test).

The revised NioFile shares one cache for all Nio files. The revised
MemoryCache uses SoftReferences to allow the cache to grow unbounded, and
let the GC handle cache reductions (it seems to be that for most JVMs,
SoftReferences are reclaimed in a LRU order which helps).

This test only demonstrates improvements in the low-level IO layer, but one
could infer significant performance improvements for common searches and/or
document retrievals.

Is there a standard Lucene search performance I could run both with and
without the NioFSDirectory to demonstrate real world performance
improvements? I have some internal tests that I am collating, but I would
rather use a standard test if possible.
package org.apache.lucene.util;

import java.io.*;
import java.util.*;

import junit.framework.TestCase;

public class NioFilePerformanceTest extends TestCase {
    static final int BLOCKSIZE = 1 * 2048 + 1; // try with 2k, 4k and 2k+1 (so not on nio boundry)
    static final int NBLOCKS = 1000 * 100; // 400 mb file with 4k blocks
    static final File file = new File("testfile");
    static final int NREADS = (NBLOCKS)*100;
    static final int PERCENTOFFILE = 50; // must be 1-100
    static final byte[] block = new byte[BLOCKSIZE];
    
    static {
        System.setProperty("org.apache.lucene.CachePercent","90");
    }
    
    public void testCreateFile() throws Exception {
        long stime = System.currentTimeMillis();
        RandomAccessFile rf = new RandomAccessFile(file,"rw");
        for(int i=0;i<NBLOCKS;i++) {
            Arrays.fill(block,(byte)i);
            rf.seek(i*(long)BLOCKSIZE);
            rf.write(block);
        }
        rf.close();
        System.out.println("create file = "+(System.currentTimeMillis()-stime));
    }
    public void testReadRandom() throws Exception {
        long stime = System.currentTimeMillis();
        RandomAccessFile rf = new RandomAccessFile(file,"r");
        Random r = new Random();
        for(int i=0;i<NREADS;i++) {
            int blockno = r.nextInt((NBLOCKS*PERCENTOFFILE)/100);
            rf.seek(blockno*(long)BLOCKSIZE);
            rf.read(block);
        }
        rf.close();
        System.out.println("read random = "+(System.currentTimeMillis()-stime));
    }
    public void testReadSame() throws Exception {
        long stime = System.currentTimeMillis();
        RandomAccessFile rf = new RandomAccessFile(file,"r");
        Random r = new Random();
        int blockno = r.nextInt((NBLOCKS*PERCENTOFFILE)/100);
        for(int i=0;i<NREADS;i++) {
            rf.seek(blockno*(long)BLOCKSIZE);
            rf.read(block);
        }
        rf.close();
        System.out.println("read same = "+(System.currentTimeMillis()-stime));
    }
    public void testReadRandomNio() throws Exception {
        long stime = System.currentTimeMillis();
        NioFile nf = new NioFile(file,"r");
        Random r = new Random();
        for(int i=0;i<NREADS;i++) {
            int blockno = r.nextInt((NBLOCKS*PERCENTOFFILE)/100);
            nf.read(block,0,BLOCKSIZE,blockno*(long)BLOCKSIZE);
        }
        nf.close();
        System.out.println("nio read random = "+(System.currentTimeMillis()-stime));
        System.out.println("nio max mem = "+NioFile.cache.maxmem());
        System.out.println("nio memory = "+NioFile.cache.memused());
        System.out.println("nio hits = "+NioFile.cachehits);
        System.out.println("nio misses = "+NioFile.cachemisses);
        System.out.println("nio hit rate = "+(NioFile.cachehits*100/(NioFile.cachehits+NioFile.cachemisses)));
    }
    public void testReadSameNio() throws Exception {
        long stime = System.currentTimeMillis();
        NioFile nf = new NioFile(file,"r");
        Random r = new Random();
        int blockno = r.nextInt((NBLOCKS*PERCENTOFFILE)/100);
        for(int i=0;i<NREADS;i++) {
            nf.read(block,0,BLOCKSIZE,blockno*(long)BLOCKSIZE);
        }
        nf.close();
        System.out.println("nio read same = "+(System.currentTimeMillis()-stime));
    }
}
package org.apache.lucene.util;

import java.lang.ref.*;
import java.util.*;

/**
 * memory cache. maintains blockno to data block mapping, uses soft references for data
 * to dynamically alter cache sized based on available memory. Relies on most JVMs nature to
 * free LRU SoftReferences.
 */
public class MemoryCache {
    private Map cache;
    private long cachesize;

    private ReferenceQueue refq = new ReferenceQueue();

    private int interval = 0;
    
    /**
     * number of operations before ReferenceQueue is polled
     */
    private static final int INTERVAL = 10000;
    
    private long maxsize = Runtime.getRuntime().maxMemory();

    public MemoryCache() {
        cache = new HashMap();
    }
    
    public synchronized byte[] get(Object key) {
        pollQueue();
            
        CacheEntry ce = (CacheEntry) cache.get(key);
        return ce != null ? ce.getData() : null;
    }
    
    private void pollQueue() {
        if(++interval>INTERVAL) {
            synchronized(cache) {
                CacheEntry ce = null;
                while ((ce = (CacheEntry) refq.poll()) != null) {
                    if(cache.remove(ce.key)!=null)
                        cachesize-=ce.size;
                }
                interval=0;
            }
        }
    }

    public synchronized void put(Object key, byte[] data) {
        if(key==null)
            throw new IllegalArgumentException("key cannot be null");
        if(data==null)
            throw new IllegalArgumentException("data block "+key+" is null");
        
        CacheEntry ce = new CacheEntry(key, data, refq);
        cachesize += ce.size;
        CacheEntry old = (CacheEntry) cache.put(key, ce);
        if (old != null)
            cachesize -= old.size;

        pollQueue();
    }

    public synchronized void clear() {
        cache.clear();
        cachesize = 0;
    }

    public int size() {
        return cache.size();
    }

    public long maxmem() {
        return maxsize;
    }

    public long memused() {
        return cachesize;
    }

    public synchronized boolean containsKey(Object key) {
        return cache.containsKey(key);
    }

    private final static class CacheEntry extends SoftReference {
        int size;
        Object key;
        
        CacheEntry(Object key, byte[] data, ReferenceQueue queue) {
            super(data, queue);
            size = data.length;
            this.key = key;
        }
        byte[] getData() {
            return (byte[]) get();
        }
    }
}
package org.apache.lucene.util;

import java.io.*;
import java.lang.ref.SoftReference;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.*;

/**
 * wrapper for NIO FileChannel in order to circumvent problems with multiple threads reading the
 * same FileChannel, and to provide local cache. The current Windows implementation of FileChannel
 * has some synchronization even when performing positioned reads. See JDK bug #6265734.
 * 
 * The NioFile contains internal caching to reduce the number of physical disk reads.
 */
public final class NioFile {
    static final private int BLOCKSIZE = Integer.getInteger("org.apache.lucene.BlockSize",4096).intValue();
    
    static public int cachehits = 0; 
    static public int cachemisses = 0; 
    
    static public MemoryCache cache;
    static {
        cache = new MemoryCache();
    }

    private boolean open = true;
    private int opencount = 0;
    private FileChannel channel;
    
    public NioFile(File path,String mode) throws IOException {
//        System.out.println("new NioFile for "+path);
        open(path,mode);
    }
    
    private synchronized void open(File path,String mode) throws IOException {
        if(opencount++==0) {
            RandomAccessFile raf = new RandomAccessFile(path,mode);
            channel = raf.getChannel();
        }
    }

    public synchronized void close() throws IOException {
        if(--opencount==0)
            channel.close();
    }

    public boolean isOpen() {
        return opencount>0;
    }

    public void read(byte[] b, int offset, int len, long position) throws IOException {
        do {
            long blockno = (position/BLOCKSIZE);
            BlockKey bk = new BlockKey(this,blockno);
            byte[] block = cache.get(bk);
            
            if(block==null) {
                cachemisses++;
                block = new byte[BLOCKSIZE];
                channel.read(ByteBuffer.wrap(block),blockno*BLOCKSIZE);
                cache.put(bk,block);
            } else
                cachehits++;
            
            int blockoffset = (int) (position % BLOCKSIZE);
            int i = Math.min(len,BLOCKSIZE-blockoffset);
            
            System.arraycopy(block,blockoffset,b,offset,i);
            
            offset += i;
            len -= i;
            position += i;
            
        } while (len >0);
    }
    
    static final class BlockKey {
        private NioFile file;
        private long blockno;
        private int hashCode;

        public BlockKey(NioFile file, long blockno) {
            this.file = file;
            this.blockno = blockno;
            hashCode = (int) (file.hashCode() ^ blockno);
        }
        public int hashCode() {
            return hashCode;
        }
        public boolean equals(Object o){
            BlockKey bk0 = (BlockKey) o;
            // since the same file name can be reused (e.g. segments, etc.) and the cache entries are not cleared
            // when a file is closed, the comparison must be made on the object instance, not identity
            return file==bk0.file && blockno==bk0.blockno;
        }
    }
    
    private static Map files = new HashMap();

    public static synchronized NioFile getFile(File path, String mode) throws IOException {
        String filename = path.getName();
        // since blocks are cached, a new references must be returned for certain files where the filename is
        // reused
        if(!filename.endsWith(".cfs")) {
            return new NioFile(path,mode);
        }
        SoftReference sr = (SoftReference) files.get(path);
        NioFile file = (NioFile) (sr!=null ? sr.get() : null);
        if(file!=null) {
            file.open(path,mode);
        } else {
            file = new NioFile(path,mode);
            files.put(path,new SoftReference(file));
        }
        return file;
    }
}
package org.apache.lucene.store;

import java.io.*;

import org.apache.lucene.util.NioFile;

/**
 * similar to FSDirectory, but uses 'nio' and internal disk cache to improve performance
 */
public class NioFSDirectory extends FSDirectory {
    public NioFSDirectory(){
    }
    public InputStream openFile(String name) throws IOException {
        return new NioInputStream(new File(getFile(), name));
    }
    public IndexInput openInput(String name) throws IOException {
        return new NioInputStream(new File(getFile(), name));
    }
}

final class NioInputStream extends InputStream {
    NioFile file;
    
    boolean isClone;

    public NioInputStream(File path) throws IOException {
      file = NioFile.getFile(path,"r");
//      file = new NioFile(path,"r");
      length = path.length();
    }

    /** InputStream methods */
    protected final void readInternal(byte[] b, int offset, int len) throws IOException {
        file.read(b,offset,len,getFilePointer());
    }
    
    public final void close() throws IOException {
      if (!isClone)
        file.close();
    }

    protected final void seekInternal(long position) throws IOException {
    }

    protected final void finalize() throws IOException {
        close();
    }

    public Object clone() {
      NioInputStream clone = (NioInputStream)super.clone();
      clone.isClone = true;
      return clone;
    }
    boolean isFDValid() throws IOException {
      return file.isOpen();
    }
  }

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to