gf2121 opened a new issue, #12721:
URL: https://github.com/apache/lucene/issues/12721
### Description
An immature idea ! :)
I noticed that `BPIndexReorderer$ComputeGainsTask#computeGain()` took a lot
in CPU profile:
```
PERCENT CPU SAMPLES STACK
4.75% 53042
org.apache.lucene.misc.index.BPIndexReorderer$ComputeGainsTask#computeGain()
4.47% 49905
org.apache.lucene.analysis.standard.StandardTokenizerImpl#getNextToken()
3.87% 43194
org.apache.lucene.index.FreqProxTermsWriter$SortingPostingsEnum#reset()
3.67% 40975
org.apache.lucene.index.TermsHashPerField#writeByte()
3.59% 40003 org.apache.lucene.util.BytesRefHash#equals()
3.55% 39588
org.apache.lucene.store.MemorySegmentIndexInput#readByte()
2.80% 31197
org.apache.lucene.util.ByteBlockPool#setBytesRef()
2.52% 28154 java.io.BufferedOutputStream#write()
2.33% 25978 org.apache.lucene.util.BytesRefHash#findHash()
2.32% 25896
org.apache.lucene.codecs.lucene90.Lucene90PostingsWriter#startDoc()
2.24% 24966 org.apache.lucene.util.Sorter#binarySort()
2.23% 24864 org.apache.lucene.store.DataInput#readVInt()
2.10% 23484
org.apache.lucene.misc.index.BPIndexReorderer$ForwardIndex#seek()
2.04% 22808
org.apache.lucene.codecs.PushPostingsWriterBase#writeTerm()
```
I tried to implement this with vector api, the result looks good on AVX-512
(Java 21):
```
Benchmark (maxTerm) (termsNum) Mode Cnt Score Error
Units
GainBenchmark.gainNew 1024 128 thrpt 5 3.471 ± 0.142
ops/us
GainBenchmark.gainOld 1024 128 thrpt 5 2.031 ± 0.432
ops/us
```
<details><summary> Code </summary>
```
package testing;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.Vector;
import jdk.incubator.vector.VectorMask;
import jdk.incubator.vector.VectorOperators;
import jdk.incubator.vector.VectorSpecies;
import org.openjdk.jmh.annotations.*;
import java.util.Arrays;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@State(Scope.Benchmark)
@Warmup(iterations = 3, time = 3)
@Measurement(iterations = 5, time = 3)
@Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public class GainBenchmark {
private static final VectorSpecies<Integer> INT_SPECIES =
IntVector.SPECIES_PREFERRED;
private static final VectorSpecies<Float> FLOAT_SPECIES =
FloatVector.SPECIES_PREFERRED;
private int[] terms;
private int[] fromFreqs;
private int[] toFreqs;
private final int[] indicesBuffer = new int[128];
@Param({"128"})
int termsNum;
@Param({"1024"})
int maxTerm;
private static final float[] LOG2_TABLE = new float[256];
static {
LOG2_TABLE[0] = 1f;
// float that has the biased exponent of 1f and zeros for sign and
mantissa bits
final int one = Float.floatToIntBits(1f);
for (int i = 0; i < 256; ++i) {
float f = Float.intBitsToFloat(one | (i << (23 - 8)));
LOG2_TABLE[i] = (float) (Math.log(f) / Math.log(2));
}
}
@Setup(Level.Trial)
public void init() {
terms = new int[termsNum];
for (int i=0; i < termsNum; i++) {
terms[i] = ThreadLocalRandom.current().nextInt(maxTerm);
}
fromFreqs = new int[maxTerm];
toFreqs = new int[maxTerm];
for (int i = 0; i < maxTerm; i++) {
fromFreqs[i] = ThreadLocalRandom.current().nextInt(16);
toFreqs[i] = ThreadLocalRandom.current().nextInt(16);
}
float o = gainOld();
float n = gainNew();
if (Math.abs(o - n) > 1E-4f) {
throw new RuntimeException("New is wrong, old: " + o + ", new: " + n);
}
}
@Benchmark
public float gainOld() {
float gain = 0;
for (int i = 0; i < terms.length; ++i) {
final int termID = terms[i];
final int fromDocFreq = fromFreqs[termID];
final int toDocFreq = toFreqs[termID];
assert fromDocFreq >= 0;
assert toDocFreq >= 0;
gain +=
(toDocFreq == 0 ? 0 : fastLog2(toDocFreq))
- (fromDocFreq == 0 ? 0 : fastLog2(fromDocFreq));
}
return gain;
}
@Benchmark
public float gainNew() {
final int upperBound = INT_SPECIES.loopBound(termsNum);
int i = 0;
FloatVector acc = FloatVector.zero(FLOAT_SPECIES);
for (; i < upperBound; i += INT_SPECIES.length()) {
IntVector fromVector = IntVector.fromArray(INT_SPECIES, fromFreqs, 0,
terms, i);
IntVector toVector = IntVector.fromArray(INT_SPECIES, toFreqs, 0,
terms, i);
Vector<Float> gainVector =
fastLog2(toVector).sub(fastLog2(fromVector));
acc = acc.add(gainVector);
}
float gain = acc.reduceLanes(VectorOperators.ADD);
for ( ; i < terms.length; ++i) {
final int termID = terms[i];
final int fromDocFreq = fromFreqs[termID];
final int toDocFreq = toFreqs[termID];
assert fromDocFreq >= 0;
assert toDocFreq >= 0;
gain +=
(toDocFreq == 0 ? 0 : fastLog2(toDocFreq))
- (fromDocFreq == 0 ? 0 : fastLog2(fromDocFreq));
}
return gain;
}
private static final IntVector BROAD_31 = IntVector.broadcast(INT_SPECIES,
31);
private static final IntVector BROAD_32 = IntVector.broadcast(INT_SPECIES,
32);
private static final IntVector BROAD_24 = IntVector.broadcast(INT_SPECIES,
24);
private static final FloatVector BROAD_0F =
FloatVector.broadcast(FLOAT_SPECIES, 0);
private Vector<Float> fastLog2(IntVector intVector) {
VectorMask<Integer> mask = intVector.test(VectorOperators.IS_DEFAULT);
IntVector floorLog2 =
BROAD_31.sub(intVector.lanewise(VectorOperators.LEADING_ZEROS_COUNT));
IntVector tableIndices = intVector
.lanewise(VectorOperators.LSHL, BROAD_32.sub(floorLog2))
.lanewise(VectorOperators.LSHR, BROAD_24);
tableIndices.intoArray(indicesBuffer, 0);
return floorLog2.convert(VectorOperators.I2F, 0)
.add(FloatVector.fromArray(FLOAT_SPECIES, LOG2_TABLE, 0,
indicesBuffer, 0))
.blend(BROAD_0F, mask.cast(FLOAT_SPECIES));
}
static float fastLog2(int i) {
assert i > 0 : "Cannot compute log of i=" + i;
int floorLog2 = 31 - Integer.numberOfLeadingZeros(i);
int tableIndex = i << (32 - floorLog2) >>> (32 - 8);
return floorLog2 + LOG2_TABLE[tableIndex];
}
}
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]