jpountz commented on a change in pull request #464:
URL: https://github.com/apache/lucene/pull/464#discussion_r781979863



##########
File path: 
lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java
##########
@@ -0,0 +1,910 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.memory;
+
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.*;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Reader for {@link DirectDocValuesFormat}
+ */
+
+class DirectDocValuesProducer extends DocValuesProducer {
+  static final byte NUMBER = 0;
+  static final byte BYTES = 1;
+  static final byte SORTED = 2;
+  static final byte SORTED_SET = 3;
+  static final byte SORTED_SET_SINGLETON = 4;
+  static final byte SORTED_NUMERIC = 5;
+  static final byte SORTED_NUMERIC_SINGLETON = 6;
+  static final int VERSION_START = 3;
+  static final int VERSION_CURRENT = VERSION_START;
+  // metadata maps (just file pointers and minimal stuff)
+  private final Map<String, NumericEntry> numerics = new HashMap<>();
+  private final Map<String, BinaryEntry> binaries = new HashMap<>();
+  private final Map<String, SortedEntry> sorteds = new HashMap<>();
+  private final Map<String, SortedSetEntry> sortedSets = new HashMap<>();
+  private final Map<String, SortedNumericEntry> sortedNumerics = new 
HashMap<>();
+  private final IndexInput data;
+  // ram instances we have already loaded
+  private final Map<String, NumericRawValues> numericInstances = new 
HashMap<>();
+  private final Map<String, BinaryRawValues> binaryInstances = new HashMap<>();
+  private final Map<String, SortedRawValues> sortedInstances = new HashMap<>();
+  private final Map<String, SortedSetRawValues> sortedSetInstances = new 
HashMap<>();
+  private final Map<String, SortedNumericRawValues> sortedNumericInstances = 
new HashMap<>();
+  private final Map<String, FixedBitSet> docsWithFieldInstances = new 
HashMap<>();
+  private final int numEntries;
+  private final int maxDoc;
+  private final int version;
+  private final boolean merging;
+
+  // clone for merge: when merging we don't do any instances.put()s
+  DirectDocValuesProducer(DirectDocValuesProducer original) {
+    assert Thread.holdsLock(original);
+    numerics.putAll(original.numerics);
+    binaries.putAll(original.binaries);
+    sorteds.putAll(original.sorteds);
+    sortedSets.putAll(original.sortedSets);
+    sortedNumerics.putAll(original.sortedNumerics);
+    data = original.data.clone();
+
+    numericInstances.putAll(original.numericInstances);
+    binaryInstances.putAll(original.binaryInstances);
+    sortedInstances.putAll(original.sortedInstances);
+    sortedSetInstances.putAll(original.sortedSetInstances);
+    sortedNumericInstances.putAll(original.sortedNumericInstances);
+    docsWithFieldInstances.putAll(original.docsWithFieldInstances);
+
+    numEntries = original.numEntries;
+    maxDoc = original.maxDoc;
+    version = original.version;
+    merging = true;
+  }
+
+  DirectDocValuesProducer(SegmentReadState state, String dataCodec, String 
dataExtension, String metaCodec, String metaExtension) throws IOException {
+    maxDoc = state.segmentInfo.maxDoc();
+    merging = false;
+    String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, 
state.segmentSuffix, metaExtension);
+    // read in the entries from the metadata file.
+    ChecksumIndexInput in = state.directory.openChecksumInput(metaName, 
state.context);
+    boolean success = false;
+    try {
+      version = CodecUtil.checkIndexHeader(in, metaCodec, VERSION_START, 
VERSION_CURRENT,
+              state.segmentInfo.getId(), state.segmentSuffix);
+      numEntries = readFields(in, state.fieldInfos);
+
+      CodecUtil.checkFooter(in);
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(in);
+      } else {
+        IOUtils.closeWhileHandlingException(in);
+      }
+    }
+
+    String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, 
state.segmentSuffix, dataExtension);
+    data = state.directory.openInput(dataName, state.context);
+    success = false;
+    try {
+      final int version2 = CodecUtil.checkIndexHeader(data, dataCodec, 
VERSION_START, VERSION_CURRENT,
+              state.segmentInfo.getId(), state.segmentSuffix);
+      if (version != version2) {
+        throw new CorruptIndexException("Format versions mismatch: meta=" + 
version + ", data=" + version2, data);
+      }
+
+      // NOTE: data file is too costly to verify checksum against all the 
bytes on open,
+      // but for now we at least verify proper structure of the checksum 
footer: which looks
+      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some 
forms of corruption
+      // such as file truncation.
+      CodecUtil.retrieveChecksum(data);
+
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this.data);
+      }
+    }
+  }
+
+  private NumericEntry readNumericEntry(IndexInput meta) throws IOException {
+    NumericEntry entry = new NumericEntry();
+    entry.offset = meta.readLong();
+    entry.count = meta.readInt();
+    entry.missingOffset = meta.readLong();
+    if (entry.missingOffset != -1) {
+      entry.missingBytes = meta.readLong();
+    } else {
+      entry.missingBytes = 0;
+    }
+    entry.byteWidth = meta.readByte();
+
+    return entry;
+  }
+
+  private BinaryEntry readBinaryEntry(IndexInput meta) throws IOException {
+    BinaryEntry entry = new BinaryEntry();
+    entry.offset = meta.readLong();
+    entry.numBytes = meta.readInt();
+    entry.count = meta.readInt();
+    entry.missingOffset = meta.readLong();
+    if (entry.missingOffset != -1) {
+      entry.missingBytes = meta.readLong();
+    } else {
+      entry.missingBytes = 0;
+    }
+
+    return entry;
+  }
+
+  private SortedEntry readSortedEntry(IndexInput meta) throws IOException {
+    SortedEntry entry = new SortedEntry();
+    entry.docToOrd = readNumericEntry(meta);
+    entry.values = readBinaryEntry(meta);
+    return entry;
+  }
+
+  private SortedSetEntry readSortedSetEntry(IndexInput meta, boolean 
singleton) throws IOException {
+    SortedSetEntry entry = new SortedSetEntry();
+    if (!singleton) {
+      entry.docToOrdAddress = readNumericEntry(meta);
+    }
+    entry.ords = readNumericEntry(meta);
+    entry.values = readBinaryEntry(meta);
+    return entry;
+  }
+
+  private SortedNumericEntry readSortedNumericEntry(IndexInput meta, boolean 
singleton) throws IOException {
+    SortedNumericEntry entry = new SortedNumericEntry();
+    if (!singleton) {
+      entry.docToAddress = readNumericEntry(meta);
+    }
+    entry.values = readNumericEntry(meta);
+    return entry;
+  }
+
+  private int readFields(IndexInput meta, FieldInfos infos) throws IOException 
{
+    int numEntries = 0;
+    int fieldNumber = meta.readVInt();
+    while (fieldNumber != -1) {
+      numEntries++;
+      FieldInfo info = infos.fieldInfo(fieldNumber);
+      int fieldType = meta.readByte();
+      if (fieldType == NUMBER) {
+        numerics.put(info.name, readNumericEntry(meta));
+      } else if (fieldType == BYTES) {
+        binaries.put(info.name, readBinaryEntry(meta));
+      } else if (fieldType == SORTED) {
+        SortedEntry entry = readSortedEntry(meta);
+        sorteds.put(info.name, entry);
+        binaries.put(info.name, entry.values);
+      } else if (fieldType == SORTED_SET) {
+        SortedSetEntry entry = readSortedSetEntry(meta, false);
+        sortedSets.put(info.name, entry);
+        binaries.put(info.name, entry.values);
+      } else if (fieldType == SORTED_SET_SINGLETON) {
+        SortedSetEntry entry = readSortedSetEntry(meta, true);
+        sortedSets.put(info.name, entry);
+        binaries.put(info.name, entry.values);
+      } else if (fieldType == SORTED_NUMERIC) {
+        SortedNumericEntry entry = readSortedNumericEntry(meta, false);
+        sortedNumerics.put(info.name, entry);
+      } else if (fieldType == SORTED_NUMERIC_SINGLETON) {
+        SortedNumericEntry entry = readSortedNumericEntry(meta, true);
+        sortedNumerics.put(info.name, entry);
+      } else {
+        throw new CorruptIndexException("invalid entry type: " + fieldType + 
", field= " + info.name, meta);
+      }
+      fieldNumber = meta.readVInt();
+    }
+    return numEntries;
+  }
+
+  @Override
+  public String toString() {
+    return getClass().getSimpleName() + "(entries=" + numEntries + ")";
+  }
+
+  @Override
+  public void checkIntegrity() throws IOException {
+    CodecUtil.checksumEntireFile(data.clone());
+  }
+
+  @Override
+  public synchronized NumericDocValues getNumeric(FieldInfo field) throws 
IOException {
+    NumericRawValues instance = numericInstances.get(field.name);
+    NumericEntry ne = numerics.get(field.name);
+    if (instance == null) {
+      // Lazy load
+      instance = loadNumeric(ne);
+      if (!merging) {
+        numericInstances.put(field.name, instance);
+      }
+    }
+    return new NumericDocValuesSub(getMissingBits(field, ne.missingOffset, 
ne.missingBytes), instance);
+  }
+
+  private NumericRawValues loadNumeric(NumericEntry entry) throws IOException {
+    IndexInput data = this.data.clone();
+    data.seek(entry.offset + entry.missingBytes);
+    switch (entry.byteWidth) {
+      case 1: {
+        final byte[] values = new byte[entry.count];
+        data.readBytes(values, 0, entry.count);
+        return new NumericRawValues() {
+          @Override
+          public long get(int docID) {
+            return values[docID];
+          }
+        };
+      }
+
+      case 2: {
+        final short[] values = new short[entry.count];
+        for (int i = 0; i < entry.count; i++) {
+          values[i] = data.readShort();
+        }
+        return new NumericRawValues() {
+          @Override
+          public long get(int docID) {
+            return values[docID];
+          }
+        };
+      }
+
+      case 4: {
+        final int[] values = new int[entry.count];
+        for (int i = 0; i < entry.count; i++) {
+          values[i] = data.readInt();
+        }
+        return new NumericRawValues() {
+          @Override
+          public long get(int docID) {
+            return values[docID];
+          }
+        };
+      }
+
+      case 8: {
+        final long[] values = new long[entry.count];
+        for (int i = 0; i < entry.count; i++) {
+          values[i] = data.readLong();
+        }
+        return new NumericRawValues() {
+          @Override
+          public long get(int docID) {
+            return values[docID];
+          }
+        };
+      }
+
+      default:
+        throw new AssertionError();
+    }
+  }
+
+  private synchronized BinaryRawValues getBinaryRawValues(FieldInfo field) 
throws IOException {
+    BinaryRawValues instance = binaryInstances.get(field.name);
+    if (instance == null) {
+      // Lazy load
+      instance = loadBinary(binaries.get(field.name));
+      if (!merging) {
+        binaryInstances.put(field.name, instance);
+      }
+    }
+
+    return new BinaryRawValues(instance.bytes, instance.address);
+  }
+
+  @Override
+  public synchronized BinaryDocValues getBinary(FieldInfo field) throws 
IOException {
+    BinaryEntry be = binaries.get(field.name);
+    Bits docsWithField = getMissingBits(field, be.missingOffset, 
be.missingBytes);
+    BinaryRawValues values = getBinaryRawValues(field);
+    int maxDoc = docsWithField.length();
+    return new BinaryDocValues() {
+      int docID = -1;
+
+      @Override
+      public BytesRef binaryValue() {
+        return values.get(docID);
+      }
+
+      @Override
+      public boolean advanceExact(int target) {
+        docID = target;
+        return docsWithField.get(target);
+      }
+
+      @Override
+      public int docID() {
+        return docID;
+      }
+
+      @Override
+      public int nextDoc() {
+        docID++;
+        while (docID < maxDoc) {
+          if (docsWithField.get(docID)) {
+            return docID;
+          }
+          docID++;
+        }
+        docID = NO_MORE_DOCS;
+        return NO_MORE_DOCS;
+      }
+
+      @Override
+      public int advance(int target) {
+        if (target < docID) {
+          throw new IllegalArgumentException("cannot advance backwards: 
docID=" + docID + " target=" + target);
+        }
+        if (target == NO_MORE_DOCS) {
+          this.docID = NO_MORE_DOCS;
+        } else {
+          this.docID = target - 1;
+          nextDoc();
+        }
+        return docID;
+      }
+
+      @Override
+      public long cost() {
+        return 0;

Review comment:
       can you return the actual number of docs that have a value?

##########
File path: 
lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java
##########
@@ -0,0 +1,1101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.memory;
+
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.BaseTermsEnum;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.EmptyDocValuesProducer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.ImpactsEnum;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LongsRef;
+
+import java.io.IOException;
+
+import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.BYTES;
+import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.NUMBER;
+import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED;
+import static 
org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_NUMERIC;
+import static 
org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_NUMERIC_SINGLETON;
+import static 
org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_SET;
+import static 
org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_SET_SINGLETON;
+import static 
org.apache.lucene.codecs.memory.DirectDocValuesProducer.VERSION_CURRENT;
+import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
+/**
+ * Writer for {@link DirectDocValuesFormat}
+ */
+
+class DirectDocValuesConsumer extends DocValuesConsumer {
+  final int maxDoc;
+  IndexOutput data, meta;
+
+  DirectDocValuesConsumer(SegmentWriteState state, String dataCodec, String 
dataExtension, String metaCodec, String metaExtension) throws IOException {
+    maxDoc = state.segmentInfo.maxDoc();
+    boolean success = false;
+    try {
+      String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, 
state.segmentSuffix, dataExtension);
+      data = state.directory.createOutput(dataName, state.context);
+      CodecUtil.writeIndexHeader(data, dataCodec, VERSION_CURRENT, 
state.segmentInfo.getId(), state.segmentSuffix);
+      String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, 
state.segmentSuffix, metaExtension);
+      meta = state.directory.createOutput(metaName, state.context);
+      CodecUtil.writeIndexHeader(meta, metaCodec, VERSION_CURRENT, 
state.segmentInfo.getId(), state.segmentSuffix);
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this);
+      }
+    }
+  }
+
+  @Override
+  public void addNumericField(FieldInfo field, DocValuesProducer 
valuesProducer) throws IOException {
+    meta.writeVInt(field.number);
+    meta.writeByte(NUMBER);
+    addNumericFieldValues(field, new EmptyDocValuesProducer() {
+      @Override
+      public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws 
IOException {
+        return new SortedNumericDocNullableValues() {
+          final NumericDocValues in = valuesProducer.getNumeric(field);
+          long[] values = LongsRef.EMPTY_LONGS;
+          long[] nullValues = LongsRef.EMPTY_LONGS;
+          int docIDUpto, i, docValueCount;
+
+          @Override
+          public boolean isNextValueNull() {
+            return nullValues[i-1] == 1;
+          }
+
+          @Override
+          public long nextValue() {
+            return values[i++];
+          }
+
+          @Override
+          public int docValueCount() {
+            return docValueCount;
+          }
+
+          @Override
+          public boolean advanceExact(int target) {
+            throw new UnsupportedOperationException();
+          }
+
+          @Override
+          public int docID() {
+            throw new UnsupportedOperationException();
+          }
+
+          @Override
+          public int nextDoc() throws IOException {
+            if (docIDUpto == maxDoc) {
+              return NO_MORE_DOCS;
+            }
+            int docID = in.nextDoc();
+            if (docID == NO_MORE_DOCS) {
+              docID = -1;
+            }
+            docValueCount = 0;
+            nullValues = LongsRef.EMPTY_LONGS;
+            while (docIDUpto <= in.docID() && docIDUpto < maxDoc) {
+              values = ArrayUtil.grow(values, docValueCount + 1);
+              nullValues = ArrayUtil.grow(nullValues, docValueCount + 1);
+              if (docIDUpto++ == in.docID()) {
+                values[docValueCount++] = in.longValue();
+              } else {
+                nullValues[docValueCount++] = 1;
+              }
+            }
+            i = 0;
+            return docID;
+          }
+
+          @Override
+          public int advance(int target) {
+            throw new UnsupportedOperationException();
+          }
+
+          @Override
+          public long cost() {
+            throw new UnsupportedOperationException();
+          }
+        };
+      }
+    });
+  }
+
+  private abstract static class SortedNumericDocNullableValues extends 
SortedNumericDocValues {
+    public boolean isNextValueNull() {
+      return false;
+    }
+  }
+
+  private abstract static class DocNullableValuesIterator extends 
DocIdSetIterator {
+    public abstract boolean isValueNull();
+  }
+
+  private void addNumericFieldValues(FieldInfo field, final DocValuesProducer 
valuesProducer) throws IOException {
+    meta.writeLong(data.getFilePointer());
+    long minValue = Long.MAX_VALUE;
+    long maxValue = Long.MIN_VALUE;
+    boolean missing = false;
+
+    long count = 0;
+    SortedNumericDocNullableValues values = (SortedNumericDocNullableValues) 
valuesProducer.getSortedNumeric(field);
+    for (int docID = values.nextDoc(); docID != DocIdSetIterator.NO_MORE_DOCS; 
docID = values.nextDoc()) {
+      for (int i = 0, docValueCount = values.docValueCount(); i < 
docValueCount; ++i) {
+        long v = values.nextValue();
+        if (values.isNextValueNull()) {
+          missing = true;
+        } else {
+          minValue = Math.min(minValue, v);
+          maxValue = Math.max(maxValue, v);
+        }
+        count++;
+        if (count >= DirectDocValuesFormat.MAX_SORTED_SET_ORDS) {
+          throw new IllegalArgumentException("DocValuesField \"" + field.name 
+ "\" is too large, must be <= " + DirectDocValuesFormat.MAX_SORTED_SET_ORDS + 
" values/total ords");
+        }
+      }
+    }
+
+    meta.writeInt((int) count);
+
+    if (missing) {
+      long start = data.getFilePointer();
+      writeMissingBitset(new DocNullableValuesIterator() {
+        final SortedNumericDocNullableValues values = 
(SortedNumericDocNullableValues) valuesProducer.getSortedNumeric(field);
+        int docID = values.nextDoc();
+        int i;
+        int docValueCount = values.docValueCount();
+        boolean isValueMissing = false;
+
+        @Override
+        public boolean isValueNull() {
+          return isValueMissing;
+        }
+
+        @Override
+        public int docID() {
+          throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public int nextDoc() throws IOException {
+          isValueMissing = false;
+          if (i < docValueCount) {
+            i++;
+          } else {
+            docID = values.nextDoc();
+            if (docID == NO_MORE_DOCS) return NO_MORE_DOCS;
+            i = 1;
+            docValueCount = values.docValueCount();
+          }
+          values.nextValue();
+          if (values.isNextValueNull()) {
+            isValueMissing = true;
+          }
+          return docID;
+        }
+
+        @Override
+        public int advance(int target) {
+          throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public long cost() {
+          throw new UnsupportedOperationException();
+        }
+      });
+
+      meta.writeLong(start);
+      meta.writeLong(data.getFilePointer() - start);
+    } else {
+      meta.writeLong(-1L);
+    }
+
+    byte byteWidth;
+    if (minValue >= Byte.MIN_VALUE && maxValue <= Byte.MAX_VALUE) {
+      byteWidth = 1;
+    } else if (minValue >= Short.MIN_VALUE && maxValue <= Short.MAX_VALUE) {
+      byteWidth = 2;
+    } else if (minValue >= Integer.MIN_VALUE && maxValue <= Integer.MAX_VALUE) 
{
+      byteWidth = 4;
+    } else {
+      byteWidth = 8;
+    }
+    meta.writeByte(byteWidth);
+
+    values = (SortedNumericDocNullableValues) 
valuesProducer.getSortedNumeric(field);
+    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc 
= values.nextDoc()) {
+      for (int i = 0, docValueCount = values.docValueCount(); i < 
docValueCount; ++i) {
+        long v = values.nextValue();
+        if (values.isNextValueNull()) {
+          v = 0;
+        }
+
+        switch (byteWidth) {
+          case 1:
+            data.writeByte((byte) v);
+            break;
+          case 2:
+            data.writeShort((short) v);
+            break;
+          case 4:
+            data.writeInt((int) v);
+            break;
+          case 8:
+            data.writeLong(v);
+            break;
+        }
+      }
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    boolean success = false;
+    try {
+      if (meta != null) {
+        meta.writeVInt(-1); // write EOF marker
+        CodecUtil.writeFooter(meta); // write checksum
+      }
+      if (data != null) {
+        CodecUtil.writeFooter(data);
+      }
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(data, meta);
+      } else {
+        IOUtils.closeWhileHandlingException(data, meta);
+      }
+      data = meta = null;
+    }
+  }
+
+  @Override
+  public void addBinaryField(FieldInfo field, final DocValuesProducer 
valuesProducer) throws IOException {
+    meta.writeVInt(field.number);
+    meta.writeByte(BYTES);
+    addBinaryFieldValues(field, new EmptyDocValuesProducer() {
+      @Override
+      public SortedSetDocValues getSortedSet(FieldInfo field) throws 
IOException {
+        return new SortedSetDocValues() {
+          final BinaryDocValues values = valuesProducer.getBinary(field);
+
+          @Override
+          public long nextOrd() {
+            return 0;
+          }
+
+          @Override
+          public BytesRef lookupOrd(long ord) throws IOException {
+            if (ord > values.docID()) {
+              values.nextDoc();
+            }
+            BytesRef result;
+            if (ord == values.docID()) {
+              result = values.binaryValue();
+            } else {
+              result = new BytesRef();
+              result.bytes = null;
+            }
+            return result;
+          }
+
+          @Override
+          public long getValueCount() {
+            return maxDoc;
+          }
+
+          @Override
+          public boolean advanceExact(int target) {
+            return false;
+          }
+
+          @Override
+          public int docID() {
+            return values.docID();
+          }
+
+          @Override
+          public int nextDoc() {
+            return 0;
+          }
+
+          @Override
+          public int advance(int target) {
+            return 0;
+          }
+
+          @Override
+          public long cost() {
+            return 0;

Review comment:
       can you throw exceptions on these methods instead of these default 
implementations?

##########
File path: 
lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java
##########
@@ -0,0 +1,910 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.memory;
+
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.*;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Reader for {@link DirectDocValuesFormat}
+ */
+
+class DirectDocValuesProducer extends DocValuesProducer {
+  static final byte NUMBER = 0;
+  static final byte BYTES = 1;
+  static final byte SORTED = 2;
+  static final byte SORTED_SET = 3;
+  static final byte SORTED_SET_SINGLETON = 4;
+  static final byte SORTED_NUMERIC = 5;
+  static final byte SORTED_NUMERIC_SINGLETON = 6;
+  static final int VERSION_START = 3;
+  static final int VERSION_CURRENT = VERSION_START;
+  // metadata maps (just file pointers and minimal stuff)
+  private final Map<String, NumericEntry> numerics = new HashMap<>();
+  private final Map<String, BinaryEntry> binaries = new HashMap<>();
+  private final Map<String, SortedEntry> sorteds = new HashMap<>();
+  private final Map<String, SortedSetEntry> sortedSets = new HashMap<>();
+  private final Map<String, SortedNumericEntry> sortedNumerics = new 
HashMap<>();
+  private final IndexInput data;
+  // ram instances we have already loaded
+  private final Map<String, NumericRawValues> numericInstances = new 
HashMap<>();
+  private final Map<String, BinaryRawValues> binaryInstances = new HashMap<>();
+  private final Map<String, SortedRawValues> sortedInstances = new HashMap<>();
+  private final Map<String, SortedSetRawValues> sortedSetInstances = new 
HashMap<>();
+  private final Map<String, SortedNumericRawValues> sortedNumericInstances = 
new HashMap<>();
+  private final Map<String, FixedBitSet> docsWithFieldInstances = new 
HashMap<>();
+  private final int numEntries;
+  private final int maxDoc;
+  private final int version;
+  private final boolean merging;
+
+  // clone for merge: when merging we don't do any instances.put()s
+  DirectDocValuesProducer(DirectDocValuesProducer original) {
+    assert Thread.holdsLock(original);
+    numerics.putAll(original.numerics);
+    binaries.putAll(original.binaries);
+    sorteds.putAll(original.sorteds);
+    sortedSets.putAll(original.sortedSets);
+    sortedNumerics.putAll(original.sortedNumerics);
+    data = original.data.clone();
+
+    numericInstances.putAll(original.numericInstances);
+    binaryInstances.putAll(original.binaryInstances);
+    sortedInstances.putAll(original.sortedInstances);
+    sortedSetInstances.putAll(original.sortedSetInstances);
+    sortedNumericInstances.putAll(original.sortedNumericInstances);
+    docsWithFieldInstances.putAll(original.docsWithFieldInstances);
+
+    numEntries = original.numEntries;
+    maxDoc = original.maxDoc;
+    version = original.version;
+    merging = true;
+  }
+
+  DirectDocValuesProducer(SegmentReadState state, String dataCodec, String 
dataExtension, String metaCodec, String metaExtension) throws IOException {
+    maxDoc = state.segmentInfo.maxDoc();
+    merging = false;
+    String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, 
state.segmentSuffix, metaExtension);
+    // read in the entries from the metadata file.
+    ChecksumIndexInput in = state.directory.openChecksumInput(metaName, 
state.context);
+    boolean success = false;
+    try {
+      version = CodecUtil.checkIndexHeader(in, metaCodec, VERSION_START, 
VERSION_CURRENT,
+              state.segmentInfo.getId(), state.segmentSuffix);
+      numEntries = readFields(in, state.fieldInfos);
+
+      CodecUtil.checkFooter(in);
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(in);
+      } else {
+        IOUtils.closeWhileHandlingException(in);
+      }
+    }
+
+    String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, 
state.segmentSuffix, dataExtension);
+    data = state.directory.openInput(dataName, state.context);
+    success = false;
+    try {
+      final int version2 = CodecUtil.checkIndexHeader(data, dataCodec, 
VERSION_START, VERSION_CURRENT,
+              state.segmentInfo.getId(), state.segmentSuffix);
+      if (version != version2) {
+        throw new CorruptIndexException("Format versions mismatch: meta=" + 
version + ", data=" + version2, data);
+      }
+
+      // NOTE: data file is too costly to verify checksum against all the 
bytes on open,
+      // but for now we at least verify proper structure of the checksum 
footer: which looks
+      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some 
forms of corruption
+      // such as file truncation.
+      CodecUtil.retrieveChecksum(data);
+
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this.data);
+      }
+    }
+  }
+
+  private NumericEntry readNumericEntry(IndexInput meta) throws IOException {
+    NumericEntry entry = new NumericEntry();
+    entry.offset = meta.readLong();
+    entry.count = meta.readInt();
+    entry.missingOffset = meta.readLong();
+    if (entry.missingOffset != -1) {
+      entry.missingBytes = meta.readLong();
+    } else {
+      entry.missingBytes = 0;
+    }
+    entry.byteWidth = meta.readByte();
+
+    return entry;
+  }
+
+  private BinaryEntry readBinaryEntry(IndexInput meta) throws IOException {
+    BinaryEntry entry = new BinaryEntry();
+    entry.offset = meta.readLong();
+    entry.numBytes = meta.readInt();
+    entry.count = meta.readInt();
+    entry.missingOffset = meta.readLong();
+    if (entry.missingOffset != -1) {
+      entry.missingBytes = meta.readLong();
+    } else {
+      entry.missingBytes = 0;
+    }
+
+    return entry;
+  }
+
+  private SortedEntry readSortedEntry(IndexInput meta) throws IOException {
+    SortedEntry entry = new SortedEntry();
+    entry.docToOrd = readNumericEntry(meta);
+    entry.values = readBinaryEntry(meta);
+    return entry;
+  }
+
+  private SortedSetEntry readSortedSetEntry(IndexInput meta, boolean 
singleton) throws IOException {
+    SortedSetEntry entry = new SortedSetEntry();
+    if (!singleton) {
+      entry.docToOrdAddress = readNumericEntry(meta);
+    }
+    entry.ords = readNumericEntry(meta);
+    entry.values = readBinaryEntry(meta);
+    return entry;
+  }
+
+  private SortedNumericEntry readSortedNumericEntry(IndexInput meta, boolean 
singleton) throws IOException {
+    SortedNumericEntry entry = new SortedNumericEntry();
+    if (!singleton) {
+      entry.docToAddress = readNumericEntry(meta);
+    }
+    entry.values = readNumericEntry(meta);
+    return entry;
+  }
+
+  private int readFields(IndexInput meta, FieldInfos infos) throws IOException 
{
+    int numEntries = 0;
+    int fieldNumber = meta.readVInt();
+    while (fieldNumber != -1) {
+      numEntries++;
+      FieldInfo info = infos.fieldInfo(fieldNumber);
+      int fieldType = meta.readByte();
+      if (fieldType == NUMBER) {
+        numerics.put(info.name, readNumericEntry(meta));
+      } else if (fieldType == BYTES) {
+        binaries.put(info.name, readBinaryEntry(meta));
+      } else if (fieldType == SORTED) {
+        SortedEntry entry = readSortedEntry(meta);
+        sorteds.put(info.name, entry);
+        binaries.put(info.name, entry.values);
+      } else if (fieldType == SORTED_SET) {
+        SortedSetEntry entry = readSortedSetEntry(meta, false);
+        sortedSets.put(info.name, entry);
+        binaries.put(info.name, entry.values);
+      } else if (fieldType == SORTED_SET_SINGLETON) {
+        SortedSetEntry entry = readSortedSetEntry(meta, true);
+        sortedSets.put(info.name, entry);
+        binaries.put(info.name, entry.values);
+      } else if (fieldType == SORTED_NUMERIC) {
+        SortedNumericEntry entry = readSortedNumericEntry(meta, false);
+        sortedNumerics.put(info.name, entry);
+      } else if (fieldType == SORTED_NUMERIC_SINGLETON) {
+        SortedNumericEntry entry = readSortedNumericEntry(meta, true);
+        sortedNumerics.put(info.name, entry);
+      } else {
+        throw new CorruptIndexException("invalid entry type: " + fieldType + 
", field= " + info.name, meta);
+      }
+      fieldNumber = meta.readVInt();
+    }
+    return numEntries;
+  }
+
+  @Override
+  public String toString() {
+    return getClass().getSimpleName() + "(entries=" + numEntries + ")";
+  }
+
+  @Override
+  public void checkIntegrity() throws IOException {
+    CodecUtil.checksumEntireFile(data.clone());
+  }
+
+  @Override
+  public synchronized NumericDocValues getNumeric(FieldInfo field) throws 
IOException {
+    NumericRawValues instance = numericInstances.get(field.name);
+    NumericEntry ne = numerics.get(field.name);
+    if (instance == null) {
+      // Lazy load
+      instance = loadNumeric(ne);
+      if (!merging) {
+        numericInstances.put(field.name, instance);
+      }
+    }
+    return new NumericDocValuesSub(getMissingBits(field, ne.missingOffset, 
ne.missingBytes), instance);
+  }
+
+  private NumericRawValues loadNumeric(NumericEntry entry) throws IOException {
+    IndexInput data = this.data.clone();
+    data.seek(entry.offset + entry.missingBytes);
+    switch (entry.byteWidth) {
+      case 1: {
+        final byte[] values = new byte[entry.count];
+        data.readBytes(values, 0, entry.count);
+        return new NumericRawValues() {
+          @Override
+          public long get(int docID) {
+            return values[docID];
+          }
+        };
+      }
+
+      case 2: {
+        final short[] values = new short[entry.count];
+        for (int i = 0; i < entry.count; i++) {
+          values[i] = data.readShort();
+        }
+        return new NumericRawValues() {
+          @Override
+          public long get(int docID) {
+            return values[docID];
+          }
+        };
+      }
+
+      case 4: {
+        final int[] values = new int[entry.count];
+        for (int i = 0; i < entry.count; i++) {
+          values[i] = data.readInt();
+        }
+        return new NumericRawValues() {
+          @Override
+          public long get(int docID) {
+            return values[docID];
+          }
+        };
+      }
+
+      case 8: {
+        final long[] values = new long[entry.count];
+        for (int i = 0; i < entry.count; i++) {
+          values[i] = data.readLong();
+        }
+        return new NumericRawValues() {
+          @Override
+          public long get(int docID) {
+            return values[docID];
+          }
+        };
+      }
+
+      default:
+        throw new AssertionError();
+    }
+  }
+
+  private synchronized BinaryRawValues getBinaryRawValues(FieldInfo field) 
throws IOException {
+    BinaryRawValues instance = binaryInstances.get(field.name);
+    if (instance == null) {
+      // Lazy load
+      instance = loadBinary(binaries.get(field.name));
+      if (!merging) {
+        binaryInstances.put(field.name, instance);
+      }
+    }
+
+    return new BinaryRawValues(instance.bytes, instance.address);
+  }
+
+  @Override
+  public synchronized BinaryDocValues getBinary(FieldInfo field) throws 
IOException {
+    BinaryEntry be = binaries.get(field.name);
+    Bits docsWithField = getMissingBits(field, be.missingOffset, 
be.missingBytes);
+    BinaryRawValues values = getBinaryRawValues(field);
+    int maxDoc = docsWithField.length();
+    return new BinaryDocValues() {
+      int docID = -1;
+
+      @Override
+      public BytesRef binaryValue() {
+        return values.get(docID);
+      }
+
+      @Override
+      public boolean advanceExact(int target) {
+        docID = target;
+        return docsWithField.get(target);
+      }
+
+      @Override
+      public int docID() {
+        return docID;
+      }
+
+      @Override
+      public int nextDoc() {
+        docID++;
+        while (docID < maxDoc) {
+          if (docsWithField.get(docID)) {
+            return docID;
+          }
+          docID++;
+        }
+        docID = NO_MORE_DOCS;
+        return NO_MORE_DOCS;
+      }
+
+      @Override
+      public int advance(int target) {
+        if (target < docID) {
+          throw new IllegalArgumentException("cannot advance backwards: 
docID=" + docID + " target=" + target);
+        }
+        if (target == NO_MORE_DOCS) {
+          this.docID = NO_MORE_DOCS;
+        } else {
+          this.docID = target - 1;
+          nextDoc();
+        }
+        return docID;
+      }
+
+      @Override
+      public long cost() {
+        return 0;
+      }
+    };
+  }
+
+  private BinaryRawValues loadBinary(BinaryEntry entry) throws IOException {
+    IndexInput data = this.data.clone();
+    data.seek(entry.offset);
+    final byte[] bytes = new byte[entry.numBytes];
+    data.readBytes(bytes, 0, entry.numBytes);
+    data.seek(entry.offset + entry.numBytes + entry.missingBytes);
+
+    final int[] address = new int[entry.count + 1];
+    for (int i = 0; i < entry.count; i++) {
+      address[i] = data.readInt();
+    }
+
+    address[entry.count] = data.readInt();
+    return new BinaryRawValues(bytes, address);
+  }
+
+  @Override
+  public SortedDocValues getSorted(FieldInfo field) throws IOException {
+    final SortedEntry entry = sorteds.get(field.name);
+    SortedRawValues instance;
+    synchronized (this) {
+      instance = sortedInstances.get(field.name);
+      if (instance == null) {
+        // Lazy load
+        instance = loadSorted(field);
+        if (!merging) {
+          sortedInstances.put(field.name, instance);
+        }
+      }
+    }
+    return new SortedDocValuesSub(instance.docToOrd, 
getBinaryRawValues(field), entry.values.count, maxDoc);
+  }
+
+  private SortedRawValues loadSorted(FieldInfo field) throws IOException {
+    final SortedEntry entry = sorteds.get(field.name);
+    final NumericRawValues docToOrd = loadNumeric(entry.docToOrd);
+    final SortedRawValues values = new SortedRawValues();
+    values.docToOrd = docToOrd;
+    return values;
+  }
+
+  @Override
+  public synchronized SortedNumericDocValues getSortedNumeric(FieldInfo field) 
throws IOException {
+    SortedNumericRawValues instance = sortedNumericInstances.get(field.name);
+    final SortedNumericEntry entry = sortedNumerics.get(field.name);
+    if (instance == null) {
+      // Lazy load
+      instance = loadSortedNumeric(entry);
+      if (!merging) {
+        sortedNumericInstances.put(field.name, instance);
+      }
+    }
+
+    if (entry.docToAddress == null) {
+      final Bits docsWithField = getMissingBits(field, 
entry.values.missingOffset, entry.values.missingBytes);
+      return DocValues.singleton(new NumericDocValuesSub(docsWithField, 
instance.values));
+    } else {
+      final NumericRawValues docToAddress = instance.docToAddress;
+      final NumericRawValues values = instance.values;
+
+      return new SortedNumericDocValues() {
+        int valueStart;
+        int valueLimit;
+        int docID = -1;
+        int upto;
+
+        private void setDocument(int doc) {
+          valueStart = (int) docToAddress.get(doc);
+          valueLimit = (int) docToAddress.get(doc + 1);
+        }
+
+        @Override
+        public long nextValue() {
+          return values.get(valueStart + upto++);
+        }
+
+        @Override
+        public int docValueCount() {
+          return valueLimit - valueStart;
+        }
+
+        @Override
+        public boolean advanceExact(int target) {
+          docID = target;
+          setDocument(docID);
+          upto = 0;
+          return docValueCount() != 0;
+        }
+
+        @Override
+        public int docID() {
+          return docID;
+        }
+
+        @Override
+        public int nextDoc() {
+          assert docID != NO_MORE_DOCS;
+          while (true) {
+            docID++;
+            if (docID == maxDoc) {
+              docID = NO_MORE_DOCS;
+              break;
+            }
+            setDocument(docID);
+            if (docValueCount() != 0) {
+              break;
+            }
+          }
+          upto = 0;
+          return docID;
+        }
+
+        @Override
+        public int advance(int target) {
+          if (target < docID) {
+            throw new IllegalArgumentException("cannot advance backwards: 
docID=" + docID + " target=" + target);
+          }
+          if (target >= maxDoc) {
+            docID = NO_MORE_DOCS;
+          } else {
+            docID = target - 1;
+            nextDoc();
+          }
+          return docID;
+        }
+
+        @Override
+        public long cost() {
+          return 0;

Review comment:
       likewise here

##########
File path: 
lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java
##########
@@ -0,0 +1,910 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.memory;
+
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.*;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Reader for {@link DirectDocValuesFormat}
+ */
+
+class DirectDocValuesProducer extends DocValuesProducer {
+  static final byte NUMBER = 0;
+  static final byte BYTES = 1;
+  static final byte SORTED = 2;
+  static final byte SORTED_SET = 3;
+  static final byte SORTED_SET_SINGLETON = 4;
+  static final byte SORTED_NUMERIC = 5;
+  static final byte SORTED_NUMERIC_SINGLETON = 6;
+  static final int VERSION_START = 3;
+  static final int VERSION_CURRENT = VERSION_START;
+  // metadata maps (just file pointers and minimal stuff)
+  private final Map<String, NumericEntry> numerics = new HashMap<>();
+  private final Map<String, BinaryEntry> binaries = new HashMap<>();
+  private final Map<String, SortedEntry> sorteds = new HashMap<>();
+  private final Map<String, SortedSetEntry> sortedSets = new HashMap<>();
+  private final Map<String, SortedNumericEntry> sortedNumerics = new 
HashMap<>();
+  private final IndexInput data;
+  // ram instances we have already loaded
+  private final Map<String, NumericRawValues> numericInstances = new 
HashMap<>();
+  private final Map<String, BinaryRawValues> binaryInstances = new HashMap<>();
+  private final Map<String, SortedRawValues> sortedInstances = new HashMap<>();
+  private final Map<String, SortedSetRawValues> sortedSetInstances = new 
HashMap<>();
+  private final Map<String, SortedNumericRawValues> sortedNumericInstances = 
new HashMap<>();
+  private final Map<String, FixedBitSet> docsWithFieldInstances = new 
HashMap<>();
+  private final int numEntries;
+  private final int maxDoc;
+  private final int version;
+  private final boolean merging;
+
+  // clone for merge: when merging we don't do any instances.put()s
+  DirectDocValuesProducer(DirectDocValuesProducer original) {
+    assert Thread.holdsLock(original);
+    numerics.putAll(original.numerics);
+    binaries.putAll(original.binaries);
+    sorteds.putAll(original.sorteds);
+    sortedSets.putAll(original.sortedSets);
+    sortedNumerics.putAll(original.sortedNumerics);
+    data = original.data.clone();
+
+    numericInstances.putAll(original.numericInstances);
+    binaryInstances.putAll(original.binaryInstances);
+    sortedInstances.putAll(original.sortedInstances);
+    sortedSetInstances.putAll(original.sortedSetInstances);
+    sortedNumericInstances.putAll(original.sortedNumericInstances);
+    docsWithFieldInstances.putAll(original.docsWithFieldInstances);
+
+    numEntries = original.numEntries;
+    maxDoc = original.maxDoc;
+    version = original.version;
+    merging = true;
+  }
+
+  DirectDocValuesProducer(SegmentReadState state, String dataCodec, String 
dataExtension, String metaCodec, String metaExtension) throws IOException {
+    maxDoc = state.segmentInfo.maxDoc();
+    merging = false;
+    String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, 
state.segmentSuffix, metaExtension);
+    // read in the entries from the metadata file.
+    ChecksumIndexInput in = state.directory.openChecksumInput(metaName, 
state.context);
+    boolean success = false;
+    try {
+      version = CodecUtil.checkIndexHeader(in, metaCodec, VERSION_START, 
VERSION_CURRENT,
+              state.segmentInfo.getId(), state.segmentSuffix);
+      numEntries = readFields(in, state.fieldInfos);
+
+      CodecUtil.checkFooter(in);
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(in);
+      } else {
+        IOUtils.closeWhileHandlingException(in);
+      }
+    }
+
+    String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, 
state.segmentSuffix, dataExtension);
+    data = state.directory.openInput(dataName, state.context);
+    success = false;
+    try {
+      final int version2 = CodecUtil.checkIndexHeader(data, dataCodec, 
VERSION_START, VERSION_CURRENT,
+              state.segmentInfo.getId(), state.segmentSuffix);
+      if (version != version2) {
+        throw new CorruptIndexException("Format versions mismatch: meta=" + 
version + ", data=" + version2, data);
+      }
+
+      // NOTE: data file is too costly to verify checksum against all the 
bytes on open,
+      // but for now we at least verify proper structure of the checksum 
footer: which looks
+      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some 
forms of corruption
+      // such as file truncation.
+      CodecUtil.retrieveChecksum(data);
+
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this.data);
+      }
+    }
+  }
+
+  private NumericEntry readNumericEntry(IndexInput meta) throws IOException {
+    NumericEntry entry = new NumericEntry();
+    entry.offset = meta.readLong();
+    entry.count = meta.readInt();
+    entry.missingOffset = meta.readLong();
+    if (entry.missingOffset != -1) {
+      entry.missingBytes = meta.readLong();
+    } else {
+      entry.missingBytes = 0;
+    }
+    entry.byteWidth = meta.readByte();
+
+    return entry;
+  }
+
+  private BinaryEntry readBinaryEntry(IndexInput meta) throws IOException {
+    BinaryEntry entry = new BinaryEntry();
+    entry.offset = meta.readLong();
+    entry.numBytes = meta.readInt();
+    entry.count = meta.readInt();
+    entry.missingOffset = meta.readLong();
+    if (entry.missingOffset != -1) {
+      entry.missingBytes = meta.readLong();
+    } else {
+      entry.missingBytes = 0;
+    }
+
+    return entry;
+  }
+
+  private SortedEntry readSortedEntry(IndexInput meta) throws IOException {
+    SortedEntry entry = new SortedEntry();
+    entry.docToOrd = readNumericEntry(meta);
+    entry.values = readBinaryEntry(meta);
+    return entry;
+  }
+
+  private SortedSetEntry readSortedSetEntry(IndexInput meta, boolean 
singleton) throws IOException {
+    SortedSetEntry entry = new SortedSetEntry();
+    if (!singleton) {
+      entry.docToOrdAddress = readNumericEntry(meta);
+    }
+    entry.ords = readNumericEntry(meta);
+    entry.values = readBinaryEntry(meta);
+    return entry;
+  }
+
+  private SortedNumericEntry readSortedNumericEntry(IndexInput meta, boolean 
singleton) throws IOException {
+    SortedNumericEntry entry = new SortedNumericEntry();
+    if (!singleton) {
+      entry.docToAddress = readNumericEntry(meta);
+    }
+    entry.values = readNumericEntry(meta);
+    return entry;
+  }
+
+  private int readFields(IndexInput meta, FieldInfos infos) throws IOException 
{
+    int numEntries = 0;
+    int fieldNumber = meta.readVInt();
+    while (fieldNumber != -1) {
+      numEntries++;
+      FieldInfo info = infos.fieldInfo(fieldNumber);
+      int fieldType = meta.readByte();
+      if (fieldType == NUMBER) {
+        numerics.put(info.name, readNumericEntry(meta));
+      } else if (fieldType == BYTES) {
+        binaries.put(info.name, readBinaryEntry(meta));
+      } else if (fieldType == SORTED) {
+        SortedEntry entry = readSortedEntry(meta);
+        sorteds.put(info.name, entry);
+        binaries.put(info.name, entry.values);
+      } else if (fieldType == SORTED_SET) {
+        SortedSetEntry entry = readSortedSetEntry(meta, false);
+        sortedSets.put(info.name, entry);
+        binaries.put(info.name, entry.values);
+      } else if (fieldType == SORTED_SET_SINGLETON) {
+        SortedSetEntry entry = readSortedSetEntry(meta, true);
+        sortedSets.put(info.name, entry);
+        binaries.put(info.name, entry.values);
+      } else if (fieldType == SORTED_NUMERIC) {
+        SortedNumericEntry entry = readSortedNumericEntry(meta, false);
+        sortedNumerics.put(info.name, entry);
+      } else if (fieldType == SORTED_NUMERIC_SINGLETON) {
+        SortedNumericEntry entry = readSortedNumericEntry(meta, true);
+        sortedNumerics.put(info.name, entry);
+      } else {
+        throw new CorruptIndexException("invalid entry type: " + fieldType + 
", field= " + info.name, meta);
+      }
+      fieldNumber = meta.readVInt();
+    }
+    return numEntries;
+  }
+
+  @Override
+  public String toString() {
+    return getClass().getSimpleName() + "(entries=" + numEntries + ")";
+  }
+
+  @Override
+  public void checkIntegrity() throws IOException {
+    CodecUtil.checksumEntireFile(data.clone());
+  }
+
+  @Override
+  public synchronized NumericDocValues getNumeric(FieldInfo field) throws 
IOException {
+    NumericRawValues instance = numericInstances.get(field.name);
+    NumericEntry ne = numerics.get(field.name);
+    if (instance == null) {
+      // Lazy load
+      instance = loadNumeric(ne);
+      if (!merging) {
+        numericInstances.put(field.name, instance);
+      }
+    }
+    return new NumericDocValuesSub(getMissingBits(field, ne.missingOffset, 
ne.missingBytes), instance);
+  }
+
+  private NumericRawValues loadNumeric(NumericEntry entry) throws IOException {
+    IndexInput data = this.data.clone();
+    data.seek(entry.offset + entry.missingBytes);
+    switch (entry.byteWidth) {
+      case 1: {
+        final byte[] values = new byte[entry.count];
+        data.readBytes(values, 0, entry.count);
+        return new NumericRawValues() {
+          @Override
+          public long get(int docID) {
+            return values[docID];
+          }
+        };
+      }
+
+      case 2: {
+        final short[] values = new short[entry.count];
+        for (int i = 0; i < entry.count; i++) {
+          values[i] = data.readShort();
+        }
+        return new NumericRawValues() {
+          @Override
+          public long get(int docID) {
+            return values[docID];
+          }
+        };
+      }
+
+      case 4: {
+        final int[] values = new int[entry.count];
+        for (int i = 0; i < entry.count; i++) {
+          values[i] = data.readInt();
+        }
+        return new NumericRawValues() {
+          @Override
+          public long get(int docID) {
+            return values[docID];
+          }
+        };
+      }
+
+      case 8: {
+        final long[] values = new long[entry.count];
+        for (int i = 0; i < entry.count; i++) {
+          values[i] = data.readLong();
+        }
+        return new NumericRawValues() {
+          @Override
+          public long get(int docID) {
+            return values[docID];
+          }
+        };
+      }
+
+      default:
+        throw new AssertionError();
+    }
+  }
+
+  private synchronized BinaryRawValues getBinaryRawValues(FieldInfo field) 
throws IOException {
+    BinaryRawValues instance = binaryInstances.get(field.name);
+    if (instance == null) {
+      // Lazy load
+      instance = loadBinary(binaries.get(field.name));
+      if (!merging) {
+        binaryInstances.put(field.name, instance);
+      }
+    }
+
+    return new BinaryRawValues(instance.bytes, instance.address);
+  }
+
+  @Override
+  public synchronized BinaryDocValues getBinary(FieldInfo field) throws 
IOException {
+    BinaryEntry be = binaries.get(field.name);
+    Bits docsWithField = getMissingBits(field, be.missingOffset, 
be.missingBytes);
+    BinaryRawValues values = getBinaryRawValues(field);
+    int maxDoc = docsWithField.length();
+    return new BinaryDocValues() {
+      int docID = -1;
+
+      @Override
+      public BytesRef binaryValue() {
+        return values.get(docID);
+      }
+
+      @Override
+      public boolean advanceExact(int target) {
+        docID = target;
+        return docsWithField.get(target);
+      }
+
+      @Override
+      public int docID() {
+        return docID;
+      }
+
+      @Override
+      public int nextDoc() {
+        docID++;
+        while (docID < maxDoc) {
+          if (docsWithField.get(docID)) {
+            return docID;
+          }
+          docID++;
+        }
+        docID = NO_MORE_DOCS;
+        return NO_MORE_DOCS;
+      }
+
+      @Override
+      public int advance(int target) {
+        if (target < docID) {
+          throw new IllegalArgumentException("cannot advance backwards: 
docID=" + docID + " target=" + target);
+        }
+        if (target == NO_MORE_DOCS) {
+          this.docID = NO_MORE_DOCS;
+        } else {
+          this.docID = target - 1;
+          nextDoc();
+        }
+        return docID;
+      }
+
+      @Override
+      public long cost() {
+        return 0;
+      }
+    };
+  }
+
+  private BinaryRawValues loadBinary(BinaryEntry entry) throws IOException {
+    IndexInput data = this.data.clone();
+    data.seek(entry.offset);
+    final byte[] bytes = new byte[entry.numBytes];
+    data.readBytes(bytes, 0, entry.numBytes);
+    data.seek(entry.offset + entry.numBytes + entry.missingBytes);
+
+    final int[] address = new int[entry.count + 1];
+    for (int i = 0; i < entry.count; i++) {
+      address[i] = data.readInt();
+    }
+
+    address[entry.count] = data.readInt();
+    return new BinaryRawValues(bytes, address);
+  }
+
+  @Override
+  public SortedDocValues getSorted(FieldInfo field) throws IOException {
+    final SortedEntry entry = sorteds.get(field.name);
+    SortedRawValues instance;
+    synchronized (this) {
+      instance = sortedInstances.get(field.name);
+      if (instance == null) {
+        // Lazy load
+        instance = loadSorted(field);
+        if (!merging) {
+          sortedInstances.put(field.name, instance);
+        }
+      }
+    }
+    return new SortedDocValuesSub(instance.docToOrd, 
getBinaryRawValues(field), entry.values.count, maxDoc);
+  }
+
+  private SortedRawValues loadSorted(FieldInfo field) throws IOException {
+    final SortedEntry entry = sorteds.get(field.name);
+    final NumericRawValues docToOrd = loadNumeric(entry.docToOrd);
+    final SortedRawValues values = new SortedRawValues();
+    values.docToOrd = docToOrd;
+    return values;
+  }
+
+  @Override
+  public synchronized SortedNumericDocValues getSortedNumeric(FieldInfo field) 
throws IOException {
+    SortedNumericRawValues instance = sortedNumericInstances.get(field.name);
+    final SortedNumericEntry entry = sortedNumerics.get(field.name);
+    if (instance == null) {
+      // Lazy load
+      instance = loadSortedNumeric(entry);
+      if (!merging) {
+        sortedNumericInstances.put(field.name, instance);
+      }
+    }
+
+    if (entry.docToAddress == null) {
+      final Bits docsWithField = getMissingBits(field, 
entry.values.missingOffset, entry.values.missingBytes);
+      return DocValues.singleton(new NumericDocValuesSub(docsWithField, 
instance.values));
+    } else {
+      final NumericRawValues docToAddress = instance.docToAddress;
+      final NumericRawValues values = instance.values;
+
+      return new SortedNumericDocValues() {
+        int valueStart;
+        int valueLimit;
+        int docID = -1;
+        int upto;
+
+        private void setDocument(int doc) {
+          valueStart = (int) docToAddress.get(doc);
+          valueLimit = (int) docToAddress.get(doc + 1);
+        }
+
+        @Override
+        public long nextValue() {
+          return values.get(valueStart + upto++);
+        }
+
+        @Override
+        public int docValueCount() {
+          return valueLimit - valueStart;
+        }
+
+        @Override
+        public boolean advanceExact(int target) {
+          docID = target;
+          setDocument(docID);
+          upto = 0;
+          return docValueCount() != 0;
+        }
+
+        @Override
+        public int docID() {
+          return docID;
+        }
+
+        @Override
+        public int nextDoc() {
+          assert docID != NO_MORE_DOCS;
+          while (true) {
+            docID++;
+            if (docID == maxDoc) {
+              docID = NO_MORE_DOCS;
+              break;
+            }
+            setDocument(docID);
+            if (docValueCount() != 0) {
+              break;
+            }
+          }
+          upto = 0;
+          return docID;
+        }
+
+        @Override
+        public int advance(int target) {
+          if (target < docID) {
+            throw new IllegalArgumentException("cannot advance backwards: 
docID=" + docID + " target=" + target);
+          }
+          if (target >= maxDoc) {
+            docID = NO_MORE_DOCS;
+          } else {
+            docID = target - 1;
+            nextDoc();
+          }
+          return docID;
+        }
+
+        @Override
+        public long cost() {
+          return 0;
+        }
+      };
+    }
+  }
+
+  private SortedNumericRawValues loadSortedNumeric(SortedNumericEntry entry) 
throws IOException {
+    SortedNumericRawValues instance = new SortedNumericRawValues();
+    if (entry.docToAddress != null) {
+      instance.docToAddress = loadNumeric(entry.docToAddress);
+    }
+    instance.values = loadNumeric(entry.values);
+    return instance;
+  }
+
+  @Override
+  public synchronized SortedSetDocValues getSortedSet(FieldInfo field) throws 
IOException {
+    SortedSetRawValues instance = sortedSetInstances.get(field.name);
+    final SortedSetEntry entry = sortedSets.get(field.name);
+    if (instance == null) {
+      // Lazy load
+      instance = loadSortedSet(entry);
+      if (!merging) {
+        sortedSetInstances.put(field.name, instance);
+      }
+    }
+
+    if (instance.docToOrdAddress == null) {
+      return DocValues.singleton(new SortedDocValuesSub(instance.ords, 
getBinaryRawValues(field), entry.values.count, maxDoc));
+    } else {
+      final NumericRawValues docToOrdAddress = instance.docToOrdAddress;
+      final NumericRawValues ords = instance.ords;
+      final BinaryRawValues values = getBinaryRawValues(field);
+
+      return new SortedSetDocValues() {
+        int ordUpto;
+        int ordLimit;
+        private int docID = -1;
+        private long ord;
+
+        private long innerNextOrd() {
+          if (ordUpto == ordLimit) {
+            return NO_MORE_ORDS;
+          } else {
+            return ords.get(ordUpto++);
+          }
+        }
+
+        private void setDocument(int docID) {
+          ordUpto = (int) docToOrdAddress.get(docID);
+          ordLimit = (int) docToOrdAddress.get(docID + 1);
+        }
+
+        @Override
+        public long nextOrd() {
+          long result = ord;
+          if (result != NO_MORE_ORDS) {
+            ord = innerNextOrd();
+          }
+          return result;
+        }
+
+        @Override
+        public BytesRef lookupOrd(long ord) {
+          return values.get((int) ord);
+        }
+
+        @Override
+        public long getValueCount() {
+          return entry.values.count;
+        }
+
+        @Override
+        public boolean advanceExact(int target) {
+          docID = target;
+          setDocument(docID);
+          ord = innerNextOrd();
+          return ord != NO_MORE_ORDS;
+        }
+
+        @Override
+        public int docID() {
+          return docID;
+        }
+
+        @Override
+        public int nextDoc() {
+          assert docID != NO_MORE_DOCS;
+          docID++;
+          while (docID < maxDoc) {
+            setDocument(docID);
+            ord = innerNextOrd();
+            if (ord != NO_MORE_ORDS) {
+              return docID;
+            }
+            docID++;
+          }
+          docID = NO_MORE_DOCS;
+          return NO_MORE_DOCS;
+        }
+
+        @Override
+        public int advance(int target) {
+          if (target < docID) {
+            throw new IllegalArgumentException("cannot advance backwards: 
docID=" + docID + " target=" + target);
+          }
+          if (target >= maxDoc) {
+            this.docID = NO_MORE_DOCS;
+          } else {
+            this.docID = target - 1;
+            nextDoc();
+          }
+          return docID;
+        }
+
+        @Override
+        public long cost() {
+          return 0;
+        }
+      };
+    }
+  }
+
+  private SortedSetRawValues loadSortedSet(SortedSetEntry entry) throws 
IOException {
+    SortedSetRawValues instance = new SortedSetRawValues();
+    if (entry.docToOrdAddress != null) {
+      instance.docToOrdAddress = loadNumeric(entry.docToOrdAddress);
+    }
+    instance.ords = loadNumeric(entry.ords);
+    return instance;
+  }
+
+  private Bits getMissingBits(FieldInfo field, final long offset, final long 
length) throws IOException {
+    if (offset == -1) {
+      return new Bits.MatchAllBits(maxDoc);
+    } else {
+      FixedBitSet instance;
+      synchronized (this) {
+        instance = docsWithFieldInstances.get(field.name);
+        if (instance == null) {
+          IndexInput data = this.data.clone();
+          data.seek(offset);
+          assert length % 8 == 0;
+          long[] bits = new long[(int) length >> 3];
+          for (int i = 0; i < bits.length; i++) {
+            bits[i] = data.readLong();
+          }
+          instance = new FixedBitSet(bits, maxDoc);
+          if (!merging) {
+            docsWithFieldInstances.put(field.name, instance);
+          }
+        }
+      }
+      return instance;
+    }
+  }
+
+  @Override
+  public synchronized DocValuesProducer getMergeInstance() {
+    return new DirectDocValuesProducer(this);
+  }
+
+  @Override
+  public void close() throws IOException {
+    data.close();
+  }
+
+  private static class NumericDocValuesSub extends NumericDocValues {
+    final Bits docsWithField;
+    final NumericRawValues values;
+    final int maxDoc;
+    int docID = -1;
+    long value;
+
+    public NumericDocValuesSub(Bits docsWithField, NumericRawValues values) {
+      this.docsWithField = docsWithField;
+      this.values = values;
+      this.maxDoc = docsWithField.length();
+    }
+
+    @Override
+    public int docID() {
+      return docID;
+    }
+
+    @Override
+    public int nextDoc() {
+      docID++;
+      while (docID < maxDoc) {
+        value = values.get(docID);
+        if (value != 0 || docsWithField.get(docID)) {
+          return docID;
+        }
+        docID++;
+      }
+      docID = NO_MORE_DOCS;
+      return NO_MORE_DOCS;
+    }
+
+    @Override
+    public int advance(int target) {
+      assert target >= docID : "target=" + target + " docID=" + docID;
+      if (target == NO_MORE_DOCS) {
+        this.docID = NO_MORE_DOCS;
+      } else {
+        this.docID = target - 1;
+        nextDoc();
+      }
+      return docID;
+    }
+
+    @Override
+    public boolean advanceExact(int target) throws IOException {
+      docID = target;
+      value = values.get(docID);
+      return value != 0 || docsWithField.get(docID);
+    }
+
+    @Override
+    public long cost() {
+      return 0;
+    }
+
+    @Override
+    public long longValue() {
+      return value;
+    }
+
+    @Override
+    public String toString() {
+      return "NumericDocValuesSub(" + values + ")";
+    }
+  }
+
+  private static class SortedDocValuesSub extends SortedDocValues {
+    final NumericRawValues numericRawValues;
+    final BinaryRawValues binaryRawValues;
+    final int count;
+    final int maxDoc;
+    int docID = -1;
+    int ord;
+
+    public SortedDocValuesSub(NumericRawValues numericRawValues, 
BinaryRawValues binaryRawValues, int count, int maxDoc) {
+      this.numericRawValues = numericRawValues;
+      this.binaryRawValues = binaryRawValues;
+      this.count = count;
+      this.maxDoc = maxDoc;
+    }
+
+    @Override
+    public int docID() {
+      return docID;
+    }
+
+    @Override
+    public int nextDoc() {
+      assert docID != NO_MORE_DOCS;
+      docID++;
+      while (docID < maxDoc) {
+        ord = (int) numericRawValues.get(docID);
+        if (ord != -1) {
+          return docID;
+        }
+        docID++;
+      }
+      docID = NO_MORE_DOCS;
+      return NO_MORE_DOCS;
+    }
+
+    @Override
+    public int advance(int target) {
+      if (target < docID) {
+        throw new IllegalArgumentException("cannot advance backwards: docID=" 
+ docID + " target=" + target);
+      }
+      if (target >= maxDoc) {
+        this.docID = NO_MORE_DOCS;
+      } else {
+        this.docID = target - 1;
+        nextDoc();
+      }
+      return docID;
+    }
+
+    @Override
+    public boolean advanceExact(int target) throws IOException {
+      docID = target;
+      ord = (int) numericRawValues.get(docID);
+      return ord != -1;
+    }
+
+    @Override
+    public long cost() {
+      return 0;

Review comment:
       Can you return the actual number of docs that have a value?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to