dungba88 commented on code in PR #12738:
URL: https://github.com/apache/lucene/pull/12738#discussion_r1378841828
##########
lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java:
##########
@@ -328,7 +323,128 @@ private void rehash(long lastNodeAddress) throws
IOException {
}
mask = newMask;
- entries = newEntries;
+ fstHashAddress = newEntries;
+
+ PagedGrowableWriter newCopiedOffsets =
+ new PagedGrowableWriter(
+ newSize, BLOCK_SIZE_BYTES, PackedInts.bitsRequired(copiedBytes),
PackedInts.COMPACT);
+ PagedGrowableWriter newFSTOffsets =
+ new PagedGrowableWriter(
+ newSize,
+ BLOCK_SIZE_BYTES,
+ PackedInts.bitsRequired(lastNodeAddress),
+ PackedInts.COMPACT);
+ for (long idx = 0; idx < fstNodeAddress.size(); idx++) {
+ long address = fstNodeAddress.get(idx);
+ if (address != 0) {
+ long pos = Long.hashCode(address) & newMask;
+ while (true) {
+ if (newFSTOffsets.get(pos) == 0) {
+ newFSTOffsets.set(pos, address);
+ newCopiedOffsets.set(pos, copiedNodeAddress.get(idx));
+ break;
+ }
+
+ pos = (pos + 1) & newMask;
+ }
+ }
+ }
+
+ fstNodeAddress = newFSTOffsets;
+ copiedNodeAddress = newCopiedOffsets;
+ }
+
+ // hash code for a frozen node. this must precisely match the hash
computation of an unfrozen
+ // node!
+ private long hash(long node) throws IOException {
+ FST.BytesReader in = getBytesReader(node);
+
+ final int PRIME = 31;
+
+ long h = 0;
+ fstCompiler.fst.readFirstRealTargetArc(node, scratchArc, in);
+ while (true) {
+ h = PRIME * h + scratchArc.label();
+ h = PRIME * h + (int) (scratchArc.target() ^ (scratchArc.target() >>
32));
+ h = PRIME * h + scratchArc.output().hashCode();
+ h = PRIME * h + scratchArc.nextFinalOutput().hashCode();
+ if (scratchArc.isFinal()) {
+ h += 17;
+ }
+ if (scratchArc.isLast()) {
+ break;
+ }
+ fstCompiler.fst.readNextRealArc(scratchArc, in);
+ }
+
+ return h;
+ }
+
+ /**
+ * Compares an unfrozen node (UnCompiledNode) with a frozen node at byte
location address
+ * (long), returning the local copiedNodes start address if the two nodes
are matched, or -1
+ * otherwise
+ */
+ private int getMatchedNodeLength(FSTCompiler.UnCompiledNode<T> node, long
address)
Review Comment:
This is actually renamed from `nodesEqual` (it was removed), so there is no
duplication. The old behavior is essentially `getMatchedNodeLength != -1`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]