Can we move this CHANGES entry under the beta section? On Fri, Jun 29, 2012 at 8:52 AM, <[email protected]> wrote: > Author: jpountz > Date: Fri Jun 29 12:52:54 2012 > New Revision: 1355346 > > URL: http://svn.apache.org/viewvc?rev=1355346&view=rev > Log: > LUCENE-4171: Performance improvements to Packed64 (Toke Eskildsen). > > Modified: > lucene/dev/trunk/lucene/CHANGES.txt > > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java > > Modified: lucene/dev/trunk/lucene/CHANGES.txt > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1355346&r1=1355345&r2=1355346&view=diff > ============================================================================== > --- lucene/dev/trunk/lucene/CHANGES.txt (original) > +++ lucene/dev/trunk/lucene/CHANGES.txt Fri Jun 29 12:52:54 2012 > @@ -1036,6 +1036,9 @@ Optimizations > the cloned instances. WeakIdentityMap was extended to support > iterating over its keys. (Uwe Schindler) > > +* LUCENE-4171: Performance improvements to Packed64. > + (Toke Eskildsen via Adrien Grand) > + > Bug fixes > > * LUCENE-2803: The FieldCache can miss values if an entry for a reader > > Modified: > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java?rev=1355346&r1=1355345&r2=1355346&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java > Fri Jun 29 12:52:54 2012 > @@ -25,99 +25,40 @@ import java.util.Arrays; > > /** > * Space optimized random access capable array of values with a fixed number > of > - * bits. For 32 bits/value and less, performance on 32 bit machines is not > - * optimal. Consider using {@link Packed32} for such a setup. > + * bits/value. Values are packed contiguously. > * </p><p> > - * The implementation strives to avoid conditionals and expensive operations, > - * sacrificing code clarity to achieve better performance. > + * The implementation strives to perform af fast as possible under the > + * constraint of contiguous bits, by avoiding expensive operations. This > comes > + * at the cost of code clarity. > + * </p><p> > + * Technical details: This implementation is a refinement of a non-branching > + * version. The non-branching get and set methods meant that 2 or 4 atomics > in > + * the underlying array were always accessed, even for the cases where only > + * 1 or 2 were needed. Even with caching, this had a detrimental effect on > + * performance. > + * Related to this issue, the old implementation used lookup tables for > shifts > + * and masks, which also proved to be a bit slower than calculating the > shifts > + * and masks on the fly. > + * See https://issues.apache.org/jira/browse/LUCENE-4062 for details. > + * > */ > - > class Packed64 extends PackedInts.MutableImpl { > static final int BLOCK_SIZE = 64; // 32 = int, 64 = long > static final int BLOCK_BITS = 6; // The #bits representing BLOCK_SIZE > static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE > > - private static final int ENTRY_SIZE = BLOCK_SIZE + 1; > - static final int FAC_BITPOS = 3; > - > - /* > - * In order to make an efficient value-getter, conditionals should be > - * avoided. A value can be positioned inside of a block, requiring shifting > - * left or right or it can span two blocks, requiring a left-shift on the > - * first block and a right-shift on the right block. > - * </p><p> > - * By always shifting the first block both left and right, we get exactly > - * the right bits. By always shifting the second block right and applying > - * a mask, we get the right bits there. After that, we | the two bitsets. > - */ > - static final int[][] SHIFTS = > - new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS]; > - static final long[][] MASKS = new long[ENTRY_SIZE][ENTRY_SIZE]; > - > - static { // Generate shifts > - for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { > - for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { > - int[] currentShifts = SHIFTS[elementBits]; > - int base = bitPos * FAC_BITPOS; > - currentShifts[base ] = bitPos; > - currentShifts[base + 1] = BLOCK_SIZE - elementBits; > - if (bitPos <= BLOCK_SIZE - elementBits) { // Single block > - currentShifts[base + 2] = 0; > - MASKS[elementBits][bitPos] = 0; > - } else { // Two blocks > - int rBits = elementBits - (BLOCK_SIZE - bitPos); > - currentShifts[base + 2] = BLOCK_SIZE - rBits; > - MASKS[elementBits][bitPos] = ~(~0L << rBits); > - } > - } > - } > - } > - > - /* > - * The setter requires more masking than the getter. > - */ > - private static final long[][] WRITE_MASKS = > - new long[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS]; > - static { > - for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { > - long elementPosMask = ~(~0L << elementBits); > - int[] currentShifts = SHIFTS[elementBits]; > - long[] currentMasks = WRITE_MASKS[elementBits]; > - for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { > - int base = bitPos * FAC_BITPOS; > - currentMasks[base ] =~((elementPosMask > - << currentShifts[base + 1]) > - >>> currentShifts[base]); > - if (bitPos <= BLOCK_SIZE - elementBits) { // Second block not > used > - currentMasks[base+1] = ~0; // Keep all bits > - currentMasks[base+2] = 0; // Or with 0 > - } else { > - currentMasks[base+1] = ~(elementPosMask > - << currentShifts[base + 2]); > - currentMasks[base+2] = currentShifts[base + 2] == 0 ? 0 : ~0; > - } > - } > - } > - } > - > - private static int pgcd(int a, int b) { > - if (a < b) { > - return pgcd(b, a); > - } else if (b == 0) { > - return a; > - } else { > - return pgcd(b, a % b); > - } > - } > - > - /* The bits */ > + /** > + * Values are stores contiguously in the blocks array. > + */ > private final long[] blocks; > - > - // Cached calculations > - private int maxPos; // blocks.length * BLOCK_SIZE / elementBits - 1 > - private int[] shifts; // The shifts for the current elementBits > - private long[] readMasks; > - private long[] writeMasks; > + /** > + * A right-aligned mask of width BitsPerValue used by {@link #get(int)}. > + */ > + private final long maskRight; > + /** > + * Optimization: Saves one lookup in {@link #get(int)}. > + */ > + private final int bpvMinusBlockSize; > > /** > * Creates an array with the internal structures adjusted for the given > @@ -126,18 +67,18 @@ class Packed64 extends PackedInts.Mutabl > * @param bitsPerValue the number of bits available for any given value. > */ > public Packed64(int valueCount, int bitsPerValue) { > - // TODO: Test for edge-cases (2^31 values, 63 bitsPerValue) > - // +2 due to the avoid-conditionals-trick. The last entry is always 0 > - this(new long[(int)((long)valueCount * bitsPerValue / BLOCK_SIZE + 2)], > + // NOTE: block-size was previously calculated as > + // valueCount * bitsPerValue / BLOCK_SIZE + 1 > + // due to memory layout requirements dictated by non-branching code > + this(new long[size(valueCount, bitsPerValue)], > valueCount, bitsPerValue); > } > > - > /** > * Creates an array backed by the given blocks. > * </p><p> > * Note: The blocks are used directly, so changes to the given block will > - * affect the Packed32-structure. > + * affect the Packed64-structure. > * @param blocks used as the internal backing array. Not that the last > * element cannot be addressed directly. > * @param valueCount the number of values. > @@ -146,7 +87,8 @@ class Packed64 extends PackedInts.Mutabl > public Packed64(long[] blocks, int valueCount, int bitsPerValue) { > super(valueCount, bitsPerValue); > this.blocks = blocks; > - updateCached(); > + maskRight = ~0L << (BLOCK_SIZE-bitsPerValue) >>> > (BLOCK_SIZE-bitsPerValue); > + bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE; > } > > /** > @@ -161,12 +103,12 @@ class Packed64 extends PackedInts.Mutabl > throws > IOException { > super(valueCount, bitsPerValue); > int size = size(valueCount, bitsPerValue); > - blocks = new long[size+1]; // +1 due to non-conditional tricks > - // TODO: find a faster way to bulk-read longs... > + blocks = new long[size]; // Previously +1 due to non-conditional tricks > for(int i=0;i<size;i++) { > blocks[i] = in.readLong(); > } > - updateCached(); > + maskRight = ~0L << (BLOCK_SIZE-bitsPerValue) >>> > (BLOCK_SIZE-bitsPerValue); > + bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE; > } > > private static int size(int valueCount, int bitsPerValue) { > @@ -174,48 +116,57 @@ class Packed64 extends PackedInts.Mutabl > return (int)(totBitCount/64 + ((totBitCount % 64 == 0 ) ? 0:1)); > } > > - private void updateCached() { > - readMasks = MASKS[bitsPerValue]; > - shifts = SHIFTS[bitsPerValue]; > - writeMasks = WRITE_MASKS[bitsPerValue]; > - maxPos = (int)((((long)blocks.length) * BLOCK_SIZE / bitsPerValue) - 2); > - } > - > /** > * @param index the position of the value. > * @return the value at the given index. > */ > + @Override > public long get(final int index) { > - assert index >= 0 && index < size(); > + // The abstract index in a bit stream > final long majorBitPos = (long)index * bitsPerValue; > - final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE > - final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE); > - > - final int base = bitPos * FAC_BITPOS; > - assert elementPos < blocks.length : "elementPos: " + elementPos + "; > blocks.len: " + blocks.length; > - return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) | > - ((blocks[elementPos+1] >>> shifts[base+2]) & readMasks[bitPos]); > + // The index in the backing long-array > + final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); > + // The number of value-bits in the second long > + final long endBits = (majorBitPos & MOD_MASK) + bpvMinusBlockSize; > + > + if (endBits <= 0) { // Single block > + return (blocks[elementPos] >>> -endBits) & maskRight; > + } > + // Two blocks > + return ((blocks[elementPos] << endBits) > + | (blocks[elementPos+1] >>> (BLOCK_SIZE - endBits))) > + & maskRight; > } > > + @Override > public void set(final int index, final long value) { > + // The abstract index in a contiguous bit stream > final long majorBitPos = (long)index * bitsPerValue; > + // The index in the backing long-array > final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE > - final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE); > - final int base = bitPos * FAC_BITPOS; > + // The number of value-bits in the second long > + final long endBits = (majorBitPos & MOD_MASK) + bpvMinusBlockSize; > > - blocks[elementPos ] = (blocks[elementPos ] & writeMasks[base]) > - | (value << shifts[base + 1] >>> shifts[base]); > - blocks[elementPos+1] = (blocks[elementPos+1] & writeMasks[base+1]) > - | ((value << shifts[base + 2]) & > writeMasks[base+2]); > + if (endBits <= 0) { // Single block > + blocks[elementPos] = blocks[elementPos] & ~(maskRight << -endBits) > + | (value << -endBits); > + return; > + } > + // Two blocks > + blocks[elementPos] = blocks[elementPos] & ~(maskRight >>> endBits) > + | (value >>> endBits); > + blocks[elementPos+1] = blocks[elementPos+1] & (~0L >>> endBits) > + | (value << (BLOCK_SIZE - endBits)); > } > > + > @Override > public String toString() { > return "Packed64(bitsPerValue=" + bitsPerValue + ", size=" > - + size() + ", maxPos=" + maxPos > - + ", elements.length=" + blocks.length + ")"; > + + size() + ", elements.length=" + blocks.length + ")"; > } > > + @Override > public long ramBytesUsed() { > return RamUsageEstimator.sizeOf(blocks); > } > @@ -226,7 +177,7 @@ class Packed64 extends PackedInts.Mutabl > assert fromIndex <= toIndex; > > // minimum number of values that use an exact number of full blocks > - final int nAlignedValues = 64 / pgcd(64, bitsPerValue); > + final int nAlignedValues = 64 / gcd(64, bitsPerValue); > final int span = toIndex - fromIndex; > if (span <= 3 * nAlignedValues) { > // there needs be at least 2 * nAlignedValues aligned values for the > @@ -270,6 +221,17 @@ class Packed64 extends PackedInts.Mutabl > } > } > > + private static int gcd(int a, int b) { > + if (a < b) { > + return gcd(b, a); > + } else if (b == 0) { > + return a; > + } else { > + return gcd(b, a % b); > + } > + } > + > + @Override > public void clear() { > Arrays.fill(blocks, 0L); > } > >
-- lucidimagination.com --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
