ErickErickson commented on a change in pull request #705: URL: https://github.com/apache/solr/pull/705#discussion_r819190237
########## File path: solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java ########## @@ -198,104 +189,113 @@ public long ramBytesUsed() { public DocTermOrds(LeafReader reader, Bits liveDocs, String field) throws IOException { this(reader, liveDocs, field, null, Integer.MAX_VALUE); } - + // TODO: instead of all these ctors and options, take termsenum! /** Inverts only terms starting w/ prefix */ - public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix) throws IOException { + public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix) + throws IOException { this(reader, liveDocs, field, termPrefix, Integer.MAX_VALUE); } - /** Inverts only terms starting w/ prefix, and only terms - * whose docFreq (not taking deletions into account) is - * <= maxTermDocFreq */ - public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq) throws IOException { + /** + * Inverts only terms starting w/ prefix, and only terms whose docFreq (not taking deletions into + * account) is <= maxTermDocFreq + */ + public DocTermOrds( + LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq) + throws IOException { this(reader, liveDocs, field, termPrefix, maxTermDocFreq, DEFAULT_INDEX_INTERVAL_BITS); } - /** Inverts only terms starting w/ prefix, and only terms - * whose docFreq (not taking deletions into account) is - * <= maxTermDocFreq, with a custom indexing interval - * (default is every 128nd term). */ - public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq, int indexIntervalBits) throws IOException { + /** + * Inverts only terms starting w/ prefix, and only terms whose docFreq (not taking deletions into + * account) is <= maxTermDocFreq, with a custom indexing interval (default is every 128nd + * term). + */ + public DocTermOrds( + LeafReader reader, + Bits liveDocs, + String field, + BytesRef termPrefix, + int maxTermDocFreq, + int indexIntervalBits) + throws IOException { this(field, maxTermDocFreq, indexIntervalBits); uninvert(reader, liveDocs, termPrefix); } - /** Subclass inits w/ this, but be sure you then call - * uninvert, only once */ + /** Subclass inits w/ this, but be sure you then call uninvert, only once */ protected DocTermOrds(String field, int maxTermDocFreq, int indexIntervalBits) { - //System.out.println("DTO init field=" + field + " maxTDFreq=" + maxTermDocFreq); + // System.out.println("DTO init field=" + field + " maxTDFreq=" + maxTermDocFreq); this.field = field; this.maxTermDocFreq = maxTermDocFreq; this.indexIntervalBits = indexIntervalBits; - indexIntervalMask = 0xffffffff >>> (32-indexIntervalBits); + indexIntervalMask = 0xffffffff >>> (32 - indexIntervalBits); indexInterval = 1 << indexIntervalBits; } - /** + /** * Returns a TermsEnum that implements ord, or null if no terms in field. - * <p> - * we build a "private" terms - * index internally (WARNING: consumes RAM) and use that - * index to implement ord. This also enables ord on top - * of a composite reader. The returned TermsEnum is - * unpositioned. This returns null if there are no terms. - * </p> - * <p><b>NOTE</b>: you must pass the same reader that was - * used when creating this class + * + * <p>we build a "private" terms index internally (WARNING: consumes RAM) and use that index to + * implement ord. This also enables ord on top of a composite reader. The returned TermsEnum is + * unpositioned. This returns null if there are no terms. + * + * <p><b>NOTE</b>: you must pass the same reader that was used when creating this class */ public TermsEnum getOrdTermsEnum(LeafReader reader) throws IOException { // NOTE: see LUCENE-6529 before attempting to optimize this method to // return a TermsEnum directly from the reader if it already supports ord(). assert null != indexedTermsArray; - + if (0 == indexedTermsArray.length) { return null; } else { return new OrdWrappedTermsEnum(reader); } } - /** - * Returns the number of terms in this field - */ + /** Returns the number of terms in this field */ public int numTerms() { return numTermsInField; } - /** - * Returns {@code true} if no terms were indexed. - */ + /** Returns {@code true} if no terms were indexed. */ public boolean isEmpty() { return index == null; } /** Subclass can override this */ - protected void visitTerm(TermsEnum te, int termNum) throws IOException { - } + protected void visitTerm(TermsEnum te, int termNum) throws IOException {} - /** Invoked during {@link #uninvert(org.apache.lucene.index.LeafReader,Bits,BytesRef)} - * to record the document frequency for each uninverted - * term. */ - protected void setActualDocFreq(int termNum, int df) throws IOException { - } + /** + * Invoked during {@link #uninvert(org.apache.lucene.index.LeafReader,Bits,BytesRef)} to record + * the document frequency for each uninverted term. + */ + protected void setActualDocFreq(int termNum, int df) throws IOException {} /** Call this only once (if you subclass!) */ - protected void uninvert(final LeafReader reader, Bits liveDocs, final BytesRef termPrefix) throws IOException { + protected void uninvert(final LeafReader reader, Bits liveDocs, final BytesRef termPrefix) + throws IOException { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (checkForDocValues && info != null && info.getDocValuesType() != DocValuesType.NONE) { - throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + throw new IllegalStateException( + "Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } - //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix); + // System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix); final long startTime = System.nanoTime(); prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix); final int maxDoc = reader.maxDoc(); - final int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number - final int[] lastTerm = new int[maxDoc]; // last term we saw for this document - final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts) + final int[] index = Review comment: These comments got broken weirdly ########## File path: solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateProcessorFactory.java ########## @@ -170,11 +173,14 @@ private void processAddWithRetry(AddUpdateCommand cmd, int attempts, SolrInputDo // if lastVersion is null then we put -1 to assert that document must not exist lastVersion = lastVersion == null ? -1 : lastVersion; - // The AtomicUpdateDocumentMerger modifies the AddUpdateCommand.solrDoc to populate the real values of the - // modified fields. We don't want those absolute values because they are out-of-date due to the conflict - // so we restore the original document created in processAdd method and set the right version on it + // The AtomicUpdateDocumentMerger modifies the AddUpdateCommand.solrDoc to populate the + // real values of the modified fields. We don't want those absolute values because they + // are out-of-date due to the conflict so we restore the original document created in + // processAdd method and set the right version on it cmd.solrDoc = clonedOriginalDoc; - clonedOriginalDoc = clonedOriginalDoc.deepCopy(); // copy again because the old cloned ref will be modified during processAdd + // copy again because the old cloned ref will be modified during + clonedOriginalDoc = clonedOriginalDoc.deepCopy(); + // processAdd Review comment: move to comment above ########## File path: solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizer.java ########## @@ -106,34 +101,38 @@ public final boolean incrementToken() throws IOException { dataLen = ioBuffer.getLength(); bufferIndex = 0; } - // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based methods are gone + // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based + // methods are gone final int c = Character.codePointAt(ioBuffer.getBuffer(), bufferIndex, ioBuffer.getLength()); final int charCount = Character.charCount(c); bufferIndex += charCount; - if (Character.isLetter(c)) { // if it's a token char - if (length == 0) { // start of token + if (Character.isLetter(c)) { // if it's a token char + if (length == 0) { // start of token assert start == -1; start = offset + bufferIndex - charCount; end = start; - } else if (length >= buffer.length-1) { // check if a supplementary could run out of bounds - buffer = termAtt.resizeBuffer(2+length); // make sure a supplementary fits in the buffer + } else if (length + >= buffer.length - 1) { // check if a supplementary could run out of bounds + buffer = termAtt.resizeBuffer(2 + length); // make sure a supplementary fits in the buffer } end += charCount; - length += Character.toChars(Character.toLowerCase(c), buffer, length); // buffer it, normalized - if (length >= maxTokenLen) { // buffer overflow! make sure to check for >= surrogate pair could break == test + length += + Character.toChars(Character.toLowerCase(c), buffer, length); // buffer it, normalized + if (length + >= maxTokenLen) { // buffer overflow! make sure to check for >= surrogate pair could + // break == test Review comment: move entire comment above if? ########## File path: solr/core/src/java/org/apache/solr/search/JoinQuery.java ########## @@ -192,18 +197,19 @@ public boolean isCacheable(LeafReaderContext ctx) { } // most of these statistics are only used for the enum method - int fromSetSize; // number of docs in the fromSet (that match the from query) - long resultListDocs; // total number of docs collected + int fromSetSize; // number of docs in the fromSet (that match the from query) + long resultListDocs; // total number of docs collected int fromTermCount; long fromTermTotalDf; - int fromTermDirectCount; // number of fromTerms that were too small to use the filter cache - int fromTermHits; // number of fromTerms that intersected the from query + int fromTermDirectCount; // number of fromTerms that were too small to use the filter cache + int fromTermHits; // number of fromTerms that intersected the from query long fromTermHitsTotalDf; // sum of the df of the matching terms - int toTermHits; // num if intersecting from terms that match a term in the to field - long toTermHitsTotalDf; // sum of the df for the toTermHits - int toTermDirectCount; // number of toTerms that we set directly on a bitset rather than doing set intersections - int smallSetsDeferred; // number of small sets collected to be used later to intersect w/ bitset or create another small set - + int toTermHits; // num if intersecting from terms that match a term in the to field + long toTermHitsTotalDf; // sum of the df for the toTermHits + int toTermDirectCount; // number of toTerms that we set directly on a bitset rather than doing + // set intersections + int smallSetsDeferred; // number of small sets collected to be used later to intersect w/ bitset + // or create another small set Review comment: Move this to above declaration ########## File path: solr/core/src/java/org/apache/solr/search/facet/FacetMerger.java ########## @@ -24,35 +26,37 @@ import java.util.IdentityHashMap; import java.util.Map; -import static org.apache.solr.search.facet.FacetRequest.RefineMethod.SIMPLE; - - public abstract class FacetMerger { public abstract void merge(Object facetResult, Context mcontext); // FIXME // public abstract Map<String,Object> getRefinement(Context mcontext); - public Map<String,Object> getRefinement(Context mcontext) { + public Map<String, Object> getRefinement(Context mcontext) { return null; } + public abstract void finish(Context mcontext); - public abstract Object getMergedResult(); // TODO: we should pass mcontext through here as well + + public abstract Object getMergedResult(); // TODO: we should pass mcontext through here as well // This class lets mergers know overall context such as what shard is being merged // and what buckets have been seen by what shard. public static class Context { // FacetComponentState state; // todo: is this needed? final int numShards; - private final BitSet sawShard = new BitSet(); // [bucket0_shard0, bucket0_shard1, bucket0_shard2, bucket1_shard0, bucket1_shard1, bucket1_shard2] - private Map<String,Integer> shardmap = new HashMap<>(); + private final BitSet sawShard = + new BitSet(); // [bucket0_shard0, bucket0_shard1, bucket0_shard2, bucket1_shard0, Review comment: move entire comment above declaration ########## File path: solr/core/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java ########## @@ -250,26 +253,30 @@ DocumentAnalysisRequest resolveAnalysisRequest(SolrQueryRequest req) throws IOEx /** * Reads the document from the given xml stream reader. The following document format is expected: - * <p/> + * + * <p> Review comment: remove ########## File path: solr/core/src/java/org/apache/solr/util/hll/BitVector.java ########## @@ -19,242 +19,268 @@ import org.apache.solr.util.LongIterator; /** - * A vector (array) of bits that is accessed in units ("registers") of <code>width</code> - * bits which are stored as 64bit "words" (<code>long</code>s). In this context - * a register is at most 64bits. + * A vector (array) of bits that is accessed in units ("registers") of <code>width</code> bits which + * are stored as 64bit "words" (<code>long</code>s). In this context a register is at most 64bits. */ class BitVector implements Cloneable { - // NOTE: in this context, a word is 64bits - - // rather than doing division to determine how a bit index fits into 64bit - // words (i.e. longs), bit shifting is used - private static final int LOG2_BITS_PER_WORD = 6/*=>64bits*/; - private static final int BITS_PER_WORD = 1 << LOG2_BITS_PER_WORD; - private static final int BITS_PER_WORD_MASK = BITS_PER_WORD - 1; - - // ditto from above but for bytes (for output) - private static final int LOG2_BITS_PER_BYTE = 3/*=>8bits*/; - public static final int BITS_PER_BYTE = 1 << LOG2_BITS_PER_BYTE; - - // ======================================================================== - public static final int BYTES_PER_WORD = 8/*8 bytes in a long*/; - - // ************************************************************************ - // 64bit words - private final long[] words; - public final long[] words() { return words; } - public final int wordCount() { return words.length; } - public final int byteCount() { return wordCount() * BYTES_PER_WORD; } - - // the width of a register in bits (this cannot be more than 64 (the word size)) - private final int registerWidth; - public final int registerWidth() { return registerWidth; } - - private final long count; - - // ------------------------------------------------------------------------ - private final long registerMask; - - // ======================================================================== - /** - * @param width the width of each register. This cannot be negative or - * zero or greater than 63 (the signed word size). - * @param count the number of registers. This cannot be negative or zero - */ - public BitVector(final int width, final long count) { - // ceil((width * count)/BITS_PER_WORD) - this.words = new long[(int)(((width * count) + BITS_PER_WORD_MASK) >>> LOG2_BITS_PER_WORD)]; - this.registerWidth = width; - this.count = count; - - this.registerMask = (1L << width) - 1; - } + // NOTE: in this context, a word is 64bits - // ======================================================================== - /** - * @param registerIndex the index of the register whose value is to be - * retrieved. This cannot be negative. - * @return the value at the specified register index - * @see #setRegister(long, long) - * @see #setMaxRegister(long, long) - */ - // NOTE: if this changes then setMaxRegister() must change - public long getRegister(final long registerIndex) { - final long bitIndex = registerIndex * registerWidth; - final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/; - final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/; - final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/; - - if(firstWordIndex == secondWordIndex) - return ((words[firstWordIndex] >>> bitRemainder) & registerMask); - /* else -- register spans words */ - return (words[firstWordIndex] >>> bitRemainder)/*no need to mask since at top of word*/ - | (words[secondWordIndex] << (BITS_PER_WORD - bitRemainder)) & registerMask; - } + // rather than doing division to determine how a bit index fits into 64bit + // words (i.e. longs), bit shifting is used + private static final int LOG2_BITS_PER_WORD = 6 /*=>64bits*/; + private static final int BITS_PER_WORD = 1 << LOG2_BITS_PER_WORD; + private static final int BITS_PER_WORD_MASK = BITS_PER_WORD - 1; - /** - * @param registerIndex the index of the register whose value is to be set. - * This cannot be negative - * @param value the value to set in the register - * @see #getRegister(long) - * @see #setMaxRegister(long, long) - */ - // NOTE: if this changes then setMaxRegister() must change - public void setRegister(final long registerIndex, final long value) { - final long bitIndex = registerIndex * registerWidth; - final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/; - final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/; - final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/; - - final long words[] = this.words/*for convenience/performance*/; - if(firstWordIndex == secondWordIndex) { - // clear then set - words[firstWordIndex] &= ~(registerMask << bitRemainder); - words[firstWordIndex] |= (value << bitRemainder); - } else {/*register spans words*/ - // clear then set each partial word - words[firstWordIndex] &= (1L << bitRemainder) - 1; - words[firstWordIndex] |= (value << bitRemainder); - - words[secondWordIndex] &= ~(registerMask >>> (BITS_PER_WORD - bitRemainder)); - words[secondWordIndex] |= (value >>> (BITS_PER_WORD - bitRemainder)); - } - } + // ditto from above but for bytes (for output) + private static final int LOG2_BITS_PER_BYTE = 3 /*=>8bits*/; + public static final int BITS_PER_BYTE = 1 << LOG2_BITS_PER_BYTE; - // ------------------------------------------------------------------------ - /** - * @return a <code>LongIterator</code> for iterating starting at the register - * with index zero. This will never be <code>null</code>. - */ - public LongIterator registerIterator() { - return new LongIterator() { - final int registerWidth = BitVector.this.registerWidth; - final long[] words = BitVector.this.words; - final long registerMask = BitVector.this.registerMask; - - // register setup - long registerIndex = 0; - int wordIndex = 0; - int remainingWordBits = BITS_PER_WORD; - long word = words[wordIndex]; - - @Override public long next() { - long register; - if(remainingWordBits >= registerWidth) { - register = word & registerMask; - - // shift to the next register - word >>>= registerWidth; - remainingWordBits -= registerWidth; - } else { /*insufficient bits remaining in current word*/ - wordIndex++/*move to the next word*/; - - register = (word | (words[wordIndex] << remainingWordBits)) & registerMask; - - // shift to the next partial register (word) - word = words[wordIndex] >>> (registerWidth - remainingWordBits); - remainingWordBits += BITS_PER_WORD - registerWidth; - } - registerIndex++; - return register; - } - - @Override public boolean hasNext() { - return registerIndex < count; - } - }; - } + // ======================================================================== + public static final int BYTES_PER_WORD = 8 /*8 bytes in a long*/; - // ------------------------------------------------------------------------ - // composite accessors - /** - * Sets the value of the specified index register if and only if the specified - * value is greater than the current value in the register. This is equivalent - * to but much more performant than:<p/> - * - * <pre>vector.setRegister(index, Math.max(vector.getRegister(index), value));</pre> - * - * @param registerIndex the index of the register whose value is to be set. - * This cannot be negative - * @param value the value to set in the register if and only if this value - * is greater than the current value in the register - * @return <code>true</code> if and only if the specified value is greater - * than or equal to the current register value. <code>false</code> - * otherwise. - * @see #getRegister(long) - * @see #setRegister(long, long) - * @see java.lang.Math#max(long, long) - */ - // NOTE: if this changes then setRegister() must change - public boolean setMaxRegister(final long registerIndex, final long value) { - final long bitIndex = registerIndex * registerWidth; - final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/; - final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/; - final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/; - - // NOTE: matches getRegister() - final long registerValue; - final long words[] = this.words/*for convenience/performance*/; - if(firstWordIndex == secondWordIndex) - registerValue = ((words[firstWordIndex] >>> bitRemainder) & registerMask); - else /*register spans words*/ - registerValue = (words[firstWordIndex] >>> bitRemainder)/*no need to mask since at top of word*/ - | (words[secondWordIndex] << (BITS_PER_WORD - bitRemainder)) & registerMask; - - // determine which is the larger and update as necessary - if(value > registerValue) { - // NOTE: matches setRegister() - if(firstWordIndex == secondWordIndex) { - // clear then set - words[firstWordIndex] &= ~(registerMask << bitRemainder); - words[firstWordIndex] |= (value << bitRemainder); - } else {/*register spans words*/ - // clear then set each partial word - words[firstWordIndex] &= (1L << bitRemainder) - 1; - words[firstWordIndex] |= (value << bitRemainder); - - words[secondWordIndex] &= ~(registerMask >>> (BITS_PER_WORD - bitRemainder)); - words[secondWordIndex] |= (value >>> (BITS_PER_WORD - bitRemainder)); - } - } /* else -- the register value is greater (or equal) so nothing needs to be done */ - - return (value >= registerValue); - } + // ************************************************************************ + // 64bit words + private final long[] words; - // ======================================================================== - /** - * Fills this bit vector with the specified bit value. This can be used to - * clear the vector by specifying <code>0</code>. - * - * @param value the value to set all bits to (only the lowest bit is used) - */ - public void fill(final long value) { - for(long i=0; i<count; i++) { - setRegister(i, value); - } + public final long[] words() { + return words; + } + + public final int wordCount() { + return words.length; + } + + public final int byteCount() { + return wordCount() * BYTES_PER_WORD; + } + + // the width of a register in bits (this cannot be more than 64 (the word size)) + private final int registerWidth; + + public final int registerWidth() { + return registerWidth; + } + + private final long count; + + // ------------------------------------------------------------------------ + private final long registerMask; + + // ======================================================================== + /** + * @param width the width of each register. This cannot be negative or zero or greater than 63 + * (the signed word size). + * @param count the number of registers. This cannot be negative or zero + */ + public BitVector(final int width, final long count) { + // ceil((width * count)/BITS_PER_WORD) + this.words = new long[(int) (((width * count) + BITS_PER_WORD_MASK) >>> LOG2_BITS_PER_WORD)]; + this.registerWidth = width; + this.count = count; + + this.registerMask = (1L << width) - 1; + } + + // ======================================================================== + /** + * @param registerIndex the index of the register whose value is to be retrieved. This cannot be + * negative. + * @return the value at the specified register index + * @see #setRegister(long, long) + * @see #setMaxRegister(long, long) + */ + // NOTE: if this changes then setMaxRegister() must change + public long getRegister(final long registerIndex) { + final long bitIndex = registerIndex * registerWidth; + final int firstWordIndex = + (int) (bitIndex >>> LOG2_BITS_PER_WORD) /*aka (bitIndex / BITS_PER_WORD)*/; + final int secondWordIndex = + (int) ((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD) /*see above*/; + final int bitRemainder = + (int) (bitIndex & BITS_PER_WORD_MASK) /*aka (bitIndex % BITS_PER_WORD)*/; + + if (firstWordIndex == secondWordIndex) + return ((words[firstWordIndex] >>> bitRemainder) & registerMask); + /* else -- register spans words */ + return (words[firstWordIndex] >>> bitRemainder) /*no need to mask since at top of word*/ + | (words[secondWordIndex] << (BITS_PER_WORD - bitRemainder)) & registerMask; + } + + /** + * @param registerIndex the index of the register whose value is to be set. This cannot be + * negative + * @param value the value to set in the register + * @see #getRegister(long) + * @see #setMaxRegister(long, long) + */ + // NOTE: if this changes then setMaxRegister() must change + public void setRegister(final long registerIndex, final long value) { + final long bitIndex = registerIndex * registerWidth; + final int firstWordIndex = + (int) (bitIndex >>> LOG2_BITS_PER_WORD) /*aka (bitIndex / BITS_PER_WORD)*/; + final int secondWordIndex = + (int) ((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD) /*see above*/; + final int bitRemainder = + (int) (bitIndex & BITS_PER_WORD_MASK) /*aka (bitIndex % BITS_PER_WORD)*/; + + final long words[] = this.words /*for convenience/performance*/; + if (firstWordIndex == secondWordIndex) { + // clear then set + words[firstWordIndex] &= ~(registerMask << bitRemainder); + words[firstWordIndex] |= (value << bitRemainder); + } else { + /*register spans words*/ + // clear then set each partial word + words[firstWordIndex] &= (1L << bitRemainder) - 1; + words[firstWordIndex] |= (value << bitRemainder); + + words[secondWordIndex] &= ~(registerMask >>> (BITS_PER_WORD - bitRemainder)); + words[secondWordIndex] |= (value >>> (BITS_PER_WORD - bitRemainder)); } + } + + // ------------------------------------------------------------------------ + /** + * @return a <code>LongIterator</code> for iterating starting at the register with index zero. + * This will never be <code>null</code>. + */ + public LongIterator registerIterator() { + return new LongIterator() { + final int registerWidth = BitVector.this.registerWidth; + final long[] words = BitVector.this.words; + final long registerMask = BitVector.this.registerMask; + + // register setup + long registerIndex = 0; + int wordIndex = 0; + int remainingWordBits = BITS_PER_WORD; + long word = words[wordIndex]; + + @Override + public long next() { + long register; + if (remainingWordBits >= registerWidth) { + register = word & registerMask; + + // shift to the next register + word >>>= registerWidth; + remainingWordBits -= registerWidth; + } else { + /*insufficient bits remaining in current word*/ + wordIndex++ /*move to the next word*/; + + register = (word | (words[wordIndex] << remainingWordBits)) & registerMask; - // ------------------------------------------------------------------------ - /** - * Serializes the registers of the vector using the specified serializer. - * - * @param serializer the serializer to use. This cannot be <code>null</code>. - */ - public void getRegisterContents(final IWordSerializer serializer) { - for(final LongIterator iter = registerIterator(); iter.hasNext();) { - serializer.writeWord(iter.next()); + // shift to the next partial register (word) + word = words[wordIndex] >>> (registerWidth - remainingWordBits); + remainingWordBits += BITS_PER_WORD - registerWidth; } + registerIndex++; + return register; + } + + @Override + public boolean hasNext() { + return registerIndex < count; + } + }; + } + + // ------------------------------------------------------------------------ + // composite accessors + /** + * Sets the value of the specified index register if and only if the specified value is greater + * than the current value in the register. This is equivalent to but much more performant than: + * + * <p> Review comment: remove ########## File path: solr/core/src/java/org/apache/solr/cloud/api/collections/AliasCmd.java ########## @@ -51,10 +51,17 @@ protected AliasCmd(CollectionCommandContext ccc) { /** * Creates a collection (for use in a routed alias), waiting for it to be ready before returning. - * If the collection already exists then this is not an error.<p> + * If the collection already exists then this is not an error. + * + * <p> Review comment: remove ########## File path: solr/core/src/java/org/apache/solr/search/facet/FacetMerger.java ########## @@ -24,35 +26,37 @@ import java.util.IdentityHashMap; import java.util.Map; -import static org.apache.solr.search.facet.FacetRequest.RefineMethod.SIMPLE; - - public abstract class FacetMerger { public abstract void merge(Object facetResult, Context mcontext); // FIXME // public abstract Map<String,Object> getRefinement(Context mcontext); - public Map<String,Object> getRefinement(Context mcontext) { + public Map<String, Object> getRefinement(Context mcontext) { return null; } + public abstract void finish(Context mcontext); - public abstract Object getMergedResult(); // TODO: we should pass mcontext through here as well + + public abstract Object getMergedResult(); // TODO: we should pass mcontext through here as well // This class lets mergers know overall context such as what shard is being merged // and what buckets have been seen by what shard. public static class Context { // FacetComponentState state; // todo: is this needed? final int numShards; - private final BitSet sawShard = new BitSet(); // [bucket0_shard0, bucket0_shard1, bucket0_shard2, bucket1_shard0, bucket1_shard1, bucket1_shard2] - private Map<String,Integer> shardmap = new HashMap<>(); + private final BitSet sawShard = + new BitSet(); // [bucket0_shard0, bucket0_shard1, bucket0_shard2, bucket1_shard0, + // bucket1_shard1, bucket1_shard2] + private Map<String, Integer> shardmap = new HashMap<>(); public Context(int numShards) { this.numShards = numShards; } - Object root; // per-shard response - int maxBucket; // the current max bucket across all bucket types... incremented as we encounter more - int shardNum = -1; // TODO: keep same mapping across multiple phases... + Object root; // per-shard response + int maxBucket; // the current max bucket across all bucket types... incremented as we encounter + // more Review comment: move entire comment above declaration ########## File path: solr/core/src/java/org/apache/solr/search/Grouping.java ########## @@ -464,67 +481,59 @@ public static int getMax(int offset, int len, int max) { } /** - * Returns whether a cache warning should be send to the client. - * The value <code>true</code> is returned when the cache is emptied because the caching limits where met, otherwise - * <code>false</code> is returned. + * Returns whether a cache warning should be send to the client. The value <code>true</code> is + * returned when the cache is emptied because the caching limits where met, otherwise <code>false + * </code> is returned. * * @return whether a cache warning should be send to the client */ public boolean isSignalCacheWarning() { return signalCacheWarning; } - //====================================== Inner classes ============================================================= + // ===== Inner classes ===== public static enum Format { - /** - * Grouped result. Each group has its own result set. - */ + /** Grouped result. Each group has its own result set. */ grouped, - /** - * Flat result. All documents of all groups are put in one list. - */ + /** Flat result. All documents of all groups are put in one list. */ simple } public static enum TotalCount { - /** - * Computations should be based on groups. - */ + /** Computations should be based on groups. */ grouped, - /** - * Computations should be based on plain documents, so not taking grouping into account. - */ + /** Computations should be based on plain documents, so not taking grouping into account. */ ungrouped } /** - * General group command. A group command is responsible for creating the first and second pass collectors. - * A group command is also responsible for creating the response structure. - * <p> - * Note: Maybe the creating the response structure should be done in something like a ReponseBuilder??? - * Warning NOT thread save! + * General group command. A group command is responsible for creating the first and second pass + * collectors. A group command is also responsible for creating the response structure. + * + * <p>Note: Maybe the creating the response structure should be done in something like a + * ReponseBuilder??? Warning NOT thread save! */ public abstract class Command<T> { - public String key; // the name to use for this group in the response - public Sort withinGroupSort; // the sort of the documents *within* a single group. - public Sort groupSort; // the sort between groups + public String key; // the name to use for this group in the response + public Sort withinGroupSort; // the sort of the documents *within* a single group. + public Sort groupSort; // the sort between groups public int docsPerGroup; // how many docs in each group - from "group.limit" param, default=1 - public int groupOffset; // the offset within each group (for paging within each group) - public int numGroups; // how many groups - defaults to the "rows" parameter - int actualGroupsToFind; // How many groups should actually be found. Based on groupOffset and numGroups. - public int offset; // offset into the list of groups + public int groupOffset; // the offset within each group (for paging within each group) + public int numGroups; // how many groups - defaults to the "rows" parameter + int actualGroupsToFind; // How many groups should actually be found. Based on groupOffset and + // numGroups. Review comment: combine with comment above. ########## File path: solr/core/src/java/org/apache/solr/search/JoinQuery.java ########## @@ -192,18 +197,19 @@ public boolean isCacheable(LeafReaderContext ctx) { } // most of these statistics are only used for the enum method - int fromSetSize; // number of docs in the fromSet (that match the from query) - long resultListDocs; // total number of docs collected + int fromSetSize; // number of docs in the fromSet (that match the from query) + long resultListDocs; // total number of docs collected int fromTermCount; long fromTermTotalDf; - int fromTermDirectCount; // number of fromTerms that were too small to use the filter cache - int fromTermHits; // number of fromTerms that intersected the from query + int fromTermDirectCount; // number of fromTerms that were too small to use the filter cache + int fromTermHits; // number of fromTerms that intersected the from query long fromTermHitsTotalDf; // sum of the df of the matching terms - int toTermHits; // num if intersecting from terms that match a term in the to field - long toTermHitsTotalDf; // sum of the df for the toTermHits - int toTermDirectCount; // number of toTerms that we set directly on a bitset rather than doing set intersections - int smallSetsDeferred; // number of small sets collected to be used later to intersect w/ bitset or create another small set - + int toTermHits; // num if intersecting from terms that match a term in the to field + long toTermHitsTotalDf; // sum of the df for the toTermHits + int toTermDirectCount; // number of toTerms that we set directly on a bitset rather than doing + // set intersections Review comment: Move this to above declaration ########## File path: solr/core/src/java/org/apache/solr/cloud/OverseerCollectionConfigSetProcessor.java ########## @@ -87,10 +98,20 @@ private static OverseerMessageHandlerSelector getOverseerMessageHandlerSelector( Stats stats, Overseer overseer, OverseerNodePrioritizer overseerNodePrioritizer) { - final OverseerCollectionMessageHandler collMessageHandler = new OverseerCollectionMessageHandler( - zkStateReader, myId, shardHandlerFactory, adminPath, stats, overseer, overseerNodePrioritizer); - final OverseerConfigSetMessageHandler configMessageHandler = new OverseerConfigSetMessageHandler( - zkStateReader, overseer.getCoreContainer()); //coreContainer is passed instead of configSetService as configSetService is loaded late + final OverseerCollectionMessageHandler collMessageHandler = + new OverseerCollectionMessageHandler( + zkStateReader, + myId, + shardHandlerFactory, + adminPath, + stats, + overseer, + overseerNodePrioritizer); + final OverseerConfigSetMessageHandler configMessageHandler = + new OverseerConfigSetMessageHandler( + zkStateReader, + overseer.getCoreContainer()); // coreContainer is passed instead of configSetService as + // configSetService is loaded late Review comment: combine these comments above? ########## File path: solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java ########## @@ -74,218 +71,226 @@ public String getContentType(SolrQueryRequest request, SolrQueryResponse respons return contentType; } - public static PushWriter getPushWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) { + public static PushWriter getPushWriter( + Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) { return new JSONWriter(writer, req, rsp); } - - -/** - * Writes NamedLists directly as an array of NameTypeValue JSON objects... - * NamedList("a"=1,"b"=null,null=3,null=null) => - * [{"name":"a","type":"int","value":1}, - * {"name":"b","type":"null","value":null}, - * {"name":null,"type":"int","value":3}, - * {"name":null,"type":"null","value":null}] - * NamedList("a"=1,"bar"="foo",null=3.4f) => - * [{"name":"a","type":"int","value":1}, - * {"name":"bar","type":"str","value":"foo"}, - * {"name":null,"type":"float","value":3.4}] - */ -class ArrayOfNameTypeValueJSONWriter extends JSONWriter { - protected boolean writeTypeAndValueKey = false; - private final boolean writeNullName; - - public ArrayOfNameTypeValueJSONWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp, - String wrapperFunction, String namedListStyle, boolean writeNullName) { - super(writer, req, rsp, wrapperFunction, namedListStyle); - this.writeNullName = writeNullName; - } - - @Override - public void writeNamedList(String name, NamedList<?> val) throws IOException { - - if (val instanceof SimpleOrderedMap) { - super.writeNamedList(name, val); - return; + /** + * Writes NamedLists directly as an array of NameTypeValue JSON objects... + * NamedList("a"=1,"b"=null,null=3,null=null) => [{"name":"a","type":"int","value":1}, + * {"name":"b","type":"null","value":null}, {"name":null,"type":"int","value":3}, + * {"name":null,"type":"null","value":null}] NamedList("a"=1,"bar"="foo",null=3.4f) => + * [{"name":"a","type":"int","value":1}, {"name":"bar","type":"str","value":"foo"}, + * {"name":null,"type":"float","value":3.4}] + */ + class ArrayOfNameTypeValueJSONWriter extends JSONWriter { + protected boolean writeTypeAndValueKey = false; + private final boolean writeNullName; + + public ArrayOfNameTypeValueJSONWriter( + Writer writer, + SolrQueryRequest req, + SolrQueryResponse rsp, + String wrapperFunction, + String namedListStyle, + boolean writeNullName) { + super(writer, req, rsp, wrapperFunction, namedListStyle); + this.writeNullName = writeNullName; } - final int sz = val.size(); - indent(); - - writeArrayOpener(sz); - incLevel(); + @Override + public void writeNamedList(String name, NamedList<?> val) throws IOException { - boolean first = true; - for (int i=0; i<sz; i++) { - if (first) { - first = false; - } else { - writeArraySeparator(); + if (val instanceof SimpleOrderedMap) { + super.writeNamedList(name, val); + return; } + final int sz = val.size(); indent(); - final String elementName = val.getName(i); - final Object elementVal = val.getVal(i); - - /* - * JSONWriter's writeNamedListAsArrMap turns NamedList("bar"="foo") into [{"foo":"bar"}] - * but we here wish to turn it into [ {"name":"bar","type":"str","value":"foo"} ] instead. - * - * So first we write the <code>{"name":"bar",</code> portion ... - */ - writeMapOpener(-1); - if (elementName != null || writeNullName) { - writeKey("name", false); - writeVal("name", elementName); - writeMapSeparator(); + writeArrayOpener(sz); + incLevel(); + + boolean first = true; + for (int i = 0; i < sz; i++) { + if (first) { + first = false; + } else { + writeArraySeparator(); + } + + indent(); + + final String elementName = val.getName(i); + final Object elementVal = val.getVal(i); + + /* + * JSONWriter's writeNamedListAsArrMap turns NamedList("bar"="foo") into [{"foo":"bar"}] + * but we here wish to turn it into [ {"name":"bar","type":"str","value":"foo"} ] instead. + * + * So first we write the <code>{"name":"bar",</code> portion ... + */ + writeMapOpener(-1); + if (elementName != null || writeNullName) { + writeKey("name", false); + writeVal("name", elementName); + writeMapSeparator(); + } + + /* + * ... and then we write the <code>"type":"str","value":"foo"}</code> portion. + */ + writeTypeAndValueKey = true; + writeVal( + null, + elementVal); // passing null since writeVal doesn't actually use name (and we already + // wrote elementName above) Review comment: awkward ########## File path: solr/core/src/java/org/apache/solr/servlet/CoreContainerProvider.java ########## @@ -147,21 +154,27 @@ private static void waitForCoreContainer(Supplier<CoreContainer> provider, Count public void close() { CoreContainer cc = cores; -// if (cc != null) { -// ZkController zkController = cc.getZkController(); -// if (zkController != null) { -// -// // Mark Miller suggested that we should be publishing that we are down before anything else which makes -// // good sense, but the following causes test failures, so that improvement can be the subject of another -// // PR/issue. Also, jetty might already be refusing requests by this point so that's a potential issue too. -// // Digging slightly I see that there's a whole mess of code looking up collections and calculating state -// // changes associated with this call, which smells a lot like we're duplicating node state in collection -// // stuff, but it will take a lot of code reading to figure out if that's really what it is, why we -// // did it and if there's room for improvement. -// -// zkController.publishNodeAsDown(zkController.getNodeName()); -// } -// } + // if (cc != null) { + // ZkController zkController = cc.getZkController(); + // if (zkController != null) { + // + // // Mark Miller suggested that we should be publishing that we are down before anything Review comment: This all got broken up strangely with a lot of dangling lines, tighten them up? ########## File path: solr/core/src/java/org/apache/solr/schema/ManagedIndexSchemaFactory.java ########## @@ -93,60 +101,71 @@ public void init(NamedList<?> args) { @Override public String getSchemaResourceName(String cdResourceName) { - return managedSchemaResourceName; // actually a guess; reality depends on the actual files in the config set :-( + return managedSchemaResourceName; // actually a guess; reality depends on the actual files in + // the config set :-( Review comment: combine comment fragments above return ########## File path: solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java ########## @@ -805,24 +999,33 @@ public static Slice getParentSlice(ClusterState clusterState, String collectionN } if (parentSlice == null) { - // no chance of the collection being null because ClusterState#getCollection(String) would have thrown + // no chance of the collection being null because ClusterState#getCollection(String) would + // have thrown Review comment: awkward split ########## File path: solr/core/src/java/org/apache/solr/cloud/overseer/CollectionMutator.java ########## @@ -109,27 +113,30 @@ public ZkWriteCommand modifyCollection(final ClusterState clusterState, ZkNodePr if (val == null) continue; boolean enable = Boolean.parseBoolean(val); if (enable == coll.isPerReplicaState()) { - //already enabled + // already enabled log.error("trying to set perReplicaState to {} from {}", val, coll.isPerReplicaState()); continue; } - replicaOps = PerReplicaStatesOps.modifyCollection(coll, enable, PerReplicaStates.fetch(coll.getZNode(), zkClient, null)); + replicaOps = + PerReplicaStatesOps.modifyCollection( + coll, enable, PerReplicaStates.fetch(coll.getZNode(), zkClient, null)); } - if (message.containsKey(prop)) { hasAnyOps = true; - if (message.get(prop) == null) { + if (message.get(prop) == null) { props.remove(prop); - } else { + } else { // rename key from collection.configName to configName if (prop.equals(COLL_CONF)) { props.put(CONFIGNAME_PROP, message.get(prop)); } else { props.put(prop, message.get(prop)); } } - if (prop == REPLICATION_FACTOR) { //SOLR-11676 : keep NRT_REPLICAS and REPLICATION_FACTOR in sync + // SOLR-11676 : keep NRT_REPLICAS and REPLICATION_FACTOR in + if (prop == REPLICATION_FACTOR) { + // sync Review comment: awward comment split ########## File path: solr/core/src/java/org/apache/solr/search/facet/RelatednessAgg.java ########## @@ -184,12 +195,20 @@ public FacetMerger createFacetMerger(Object prototype) { // we can't get the allBuckets info from the slotContext in collect(), b/c the whole point of // sweep collection is that the "collect" methods aren't called. - // So this is the compromise: note in construction either that we're using a processor w/NO_ALL_BUCKETS + // So this is the compromise: note in construction either that we're using a processor + // w/NO_ALL_BUCKETS Review comment: tighten these ########## File path: solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java ########## @@ -564,9 +581,8 @@ public final void doSyncOrReplicateRecovery(SolrCore core) throws Exception { final String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName); Future<RecoveryInfo> replayFuture = null; - while (!successfulRecovery && !Thread.currentThread().isInterrupted() && !isClosed()) { // don't use interruption or - // it will close channels - // though + // don't use interruption or it will close channels though Review comment: put this above the while. ########## File path: solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java ########## @@ -332,27 +335,29 @@ final public void doRecovery(SolrCore core) throws Exception { } } - final private void doReplicateOnlyRecovery(SolrCore core) throws InterruptedException { + private final void doReplicateOnlyRecovery(SolrCore core) throws InterruptedException { final RTimer timer = new RTimer(); boolean successfulRecovery = false; // if (core.getUpdateHandler().getUpdateLog() != null) { - // SolrException.log(log, "'replicate-only' recovery strategy should only be used if no update logs are present, but + // SolrException.log(log, "'replicate-only' recovery strategy should only be used if no update + // logs are present, but // this core has one: " // + core.getUpdateHandler().getUpdateLog()); // return; // } - while (!successfulRecovery && !Thread.currentThread().isInterrupted() && !isClosed()) { // don't use interruption or - // it will close channels - // though + while (!successfulRecovery && !Thread.currentThread().isInterrupted() && !isClosed()) { + // don't use interruption or it will close channels though Review comment: put this above the while. ########## File path: solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java ########## @@ -254,40 +327,30 @@ protected void merge(MergePolicy.OneMerge merge) throws IOException { @Override protected void doAfterFlush() throws IOException { if (flushMeter != null) { // this is null when writer is used only for snapshot cleanup - flushMeter.mark(); // or if mergeTotals == false + flushMeter.mark(); // or if mergeTotals == false } super.doAfterFlush(); } /** - * use DocumentBuilder now... - * private final void addField(Document doc, String name, String val) { + * use DocumentBuilder now... private final void addField(Document doc, String name, String val) { * SchemaField ftype = schema.getField(name); - * <p/> - * // we don't check for a null val ourselves because a solr.FieldType - * // might actually want to map it to something. If createField() - * // returns null, then we don't store the field. - * <p/> - * Field field = ftype.createField(val, boost); - * if (field != null) doc.add(field); - * } - * <p/> - * <p/> - * public void addRecord(String[] fieldNames, String[] fieldValues) throws IOException { - * Document doc = new Document(); - * for (int i=0; i<fieldNames.length; i++) { - * String name = fieldNames[i]; - * String val = fieldNames[i]; - * <p/> - * // first null is end of list. client can reuse arrays if they want - * // and just write a single null if there is unused space. - * if (name==null) break; - * <p/> - * addField(doc,name,val); - * } - * addDocument(doc); - * } - * **** + * + * <p>// we don't check for a null val ourselves because a solr.FieldType // might actually want + * to map it to something. If createField() // returns null, then we don't store the field. + * + * <p>Field field = ftype.createField(val, boost); if (field != null) doc.add(field); } + * + * <p> Review comment: remove -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org