ah crap! thanks robert!
On Mon, Oct 10, 2011 at 8:05 PM, <[email protected]> wrote: > Author: rmuir > Date: Mon Oct 10 18:05:18 2011 > New Revision: 1181104 > > URL: http://svn.apache.org/viewvc?rev=1181104&view=rev > Log: > LUCENE-3186: svn add > > Added: > > lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java > (with props) > > lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java > (with props) > > Added: > lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java?rev=1181104&view=auto > ============================================================================== > --- > lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java > (added) > +++ > lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java > Mon Oct 10 18:05:18 2011 > @@ -0,0 +1,204 @@ > +package org.apache.lucene.index.values; > + > +/** > + * Licensed to the Apache Software Foundation (ASF) under one or more > + * contributor license agreements. See the NOTICE file distributed with > + * this work for additional information regarding copyright ownership. > + * The ASF licenses this file to You under the Apache License, Version 2.0 > + * (the "License"); you may not use this file except in compliance with > + * the License. You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > +import java.util.HashMap; > +import java.util.Map; > + > +/** > + * Type promoter that promotes {@link IndexDocValues} during merge based on > + * their {@link ValueType} and {@link #getValueSize()} > + * > + * @lucene.internal > + */ > +public class TypePromoter { > + > + private final static Map<Integer, ValueType> FLAGS_MAP = new > HashMap<Integer, ValueType>(); > + private static final TypePromoter IDENTITY_PROMOTER = new > IdentityTypePromoter(); > + public static final int VAR_TYPE_VALUE_SIZE = -1; > + > + private static final int IS_INT = 1 << 0; > + private static final int IS_BYTE = 1 << 1; > + private static final int IS_FLOAT = 1 << 2; > + /* VAR & FIXED == VAR */ > + private static final int IS_VAR = 1 << 3; > + private static final int IS_FIXED = 1 << 3 | 1 << 4; > + /* if we have FIXED & FIXED with different size we promote to VAR */ > + private static final int PROMOTE_TO_VAR_SIZE_MASK = ~(1 << 3); > + /* STRAIGHT & DEREF == STRAIGHT (dense values win) */ > + private static final int IS_STRAIGHT = 1 << 5; > + private static final int IS_DEREF = 1 << 5 | 1 << 6; > + private static final int IS_SORTED = 1 << 7; > + /* more bits wins (int16 & int32 == int32) */ > + private static final int IS_8_BIT = 1 << 8 | 1 << 9 | 1 << 10 | 1 << 11; > + private static final int IS_16_BIT = 1 << 9 | 1 << 10 | 1 << 11; > + private static final int IS_32_BIT = 1 << 10 | 1 << 11; > + private static final int IS_64_BIT = 1 << 11; > + > + private final ValueType type; > + private final int flags; > + private final int valueSize; > + > + /** > + * Returns a positive value size if this {@link TypePromoter} represents a > + * fixed variant, otherwise <code>-1</code> > + * > + * @return a positive value size if this {@link TypePromoter} represents a > + * fixed variant, otherwise <code>-1</code> > + */ > + public int getValueSize() { > + return valueSize; > + } > + > + static { > + for (ValueType type : ValueType.values()) { > + TypePromoter create = create(type, VAR_TYPE_VALUE_SIZE); > + FLAGS_MAP.put(create.flags, type); > + } > + } > + > + /** > + * Creates a new {@link TypePromoter} > + * > + * @param type > + * the {@link ValueType} this promoter represents > + * @param flags > + * the promoters flags > + * @param valueSize > + * the value size if {@link #IS_FIXED} or <code>-1</code> > otherwise. > + */ > + protected TypePromoter(ValueType type, int flags, int valueSize) { > + this.type = type; > + this.flags = flags; > + this.valueSize = valueSize; > + } > + > + /** > + * Creates a new promoted {@link TypePromoter} based on this and the given > + * {@link TypePromoter} or <code>null</code> iff the {@link TypePromoter} > + * aren't compatible. > + * > + * @param promoter > + * the incoming promoter > + * @return a new promoted {@link TypePromoter} based on this and the given > + * {@link TypePromoter} or <code>null</code> iff the > + * {@link TypePromoter} aren't compatible. > + */ > + public TypePromoter promote(TypePromoter promoter) { > + > + int promotedFlags = promoter.flags & this.flags; > + TypePromoter promoted = create(FLAGS_MAP.get(promotedFlags), valueSize); > + if (promoted == null) { > + return promoted; > + } > + if ((promoted.flags & IS_BYTE) != 0 && (promoted.flags & IS_FIXED) == > IS_FIXED) { > + if (this.valueSize == promoter.valueSize) { > + return promoted; > + } > + return create(FLAGS_MAP.get(promoted.flags & PROMOTE_TO_VAR_SIZE_MASK), > + VAR_TYPE_VALUE_SIZE); > + } > + return promoted; > + > + } > + > + /** > + * Returns the {@link ValueType} of this {@link TypePromoter} > + * > + * @return the {@link ValueType} of this {@link TypePromoter} > + */ > + public ValueType type() { > + return type; > + } > + > + @Override > + public String toString() { > + return "TypePromoter [type=" + type + ", sizeInBytes=" + valueSize + "]"; > + } > + > + /** > + * Creates a new {@link TypePromoter} for the given type and size per > value. > + * > + * @param type > + * the {@link ValueType} to create the promoter for > + * @param valueSize > + * the size per value in bytes or <code>-1</code> iff the types > have > + * variable length. > + * @return a new {@link TypePromoter} > + */ > + public static TypePromoter create(ValueType type, int valueSize) { > + if (type == null) { > + return null; > + } > + switch (type) { > + case BYTES_FIXED_DEREF: > + return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_DEREF, > valueSize); > + case BYTES_FIXED_SORTED: > + return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_SORTED, > valueSize); > + case BYTES_FIXED_STRAIGHT: > + return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_STRAIGHT, > valueSize); > + case BYTES_VAR_DEREF: > + return new TypePromoter(type, IS_BYTE | IS_VAR | IS_DEREF, > VAR_TYPE_VALUE_SIZE); > + case BYTES_VAR_SORTED: > + return new TypePromoter(type, IS_BYTE | IS_VAR | IS_SORTED, > VAR_TYPE_VALUE_SIZE); > + case BYTES_VAR_STRAIGHT: > + return new TypePromoter(type, IS_BYTE | IS_VAR | IS_STRAIGHT, > VAR_TYPE_VALUE_SIZE); > + case FIXED_INTS_16: > + return new TypePromoter(type, > + IS_INT | IS_FIXED | IS_STRAIGHT | IS_16_BIT, valueSize); > + case FIXED_INTS_32: > + return new TypePromoter(type, > + IS_INT | IS_FIXED | IS_STRAIGHT | IS_32_BIT, valueSize); > + case FIXED_INTS_64: > + return new TypePromoter(type, > + IS_INT | IS_FIXED | IS_STRAIGHT | IS_64_BIT, valueSize); > + case FIXED_INTS_8: > + return new TypePromoter(type, IS_INT | IS_FIXED | IS_STRAIGHT | > IS_8_BIT, > + valueSize); > + case FLOAT_32: > + return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT > + | IS_32_BIT, valueSize); > + case FLOAT_64: > + return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT > + | IS_64_BIT, valueSize); > + case VAR_INTS: > + return new TypePromoter(type, IS_INT | IS_VAR | IS_STRAIGHT, > VAR_TYPE_VALUE_SIZE); > + default: > + throw new IllegalStateException(); > + } > + } > + > + /** > + * Returns a {@link TypePromoter} that always promotes to the type > provided to > + * {@link #promote(TypePromoter)} > + */ > + public static TypePromoter getIdentityPromoter() { > + return IDENTITY_PROMOTER; > + } > + > + private static class IdentityTypePromoter extends TypePromoter { > + > + public IdentityTypePromoter() { > + super(null, 0, -1); > + } > + > + @Override > + public TypePromoter promote(TypePromoter promoter) { > + return promoter; > + } > + } > +} > \ No newline at end of file > > Added: > lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java?rev=1181104&view=auto > ============================================================================== > --- > lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java > (added) > +++ > lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java > Mon Oct 10 18:05:18 2011 > @@ -0,0 +1,313 @@ > +package org.apache.lucene.index.values; > + > +import java.io.IOException; > +import java.util.EnumSet; > +import java.util.Random; > + > +import org.apache.lucene.analysis.MockAnalyzer; > +import org.apache.lucene.document.Document; > +import org.apache.lucene.document.Field; > +import org.apache.lucene.document.IndexDocValuesField; > +import org.apache.lucene.document.TextField; > +import org.apache.lucene.index.CorruptIndexException; > +import org.apache.lucene.index.IndexReader; > +import org.apache.lucene.index.IndexReader.ReaderContext; > +import org.apache.lucene.index.IndexWriter; > +import org.apache.lucene.index.IndexWriterConfig; > +import org.apache.lucene.index.NoMergePolicy; > +import org.apache.lucene.index.codecs.CodecProvider; > +import org.apache.lucene.index.values.IndexDocValues.Source; > +import org.apache.lucene.store.Directory; > +import org.apache.lucene.util.BytesRef; > +import org.apache.lucene.util.LuceneTestCase; > +import org.junit.Before; > + > +/** > + * Licensed to the Apache Software Foundation (ASF) under one or more > + * contributor license agreements. See the NOTICE file distributed with this > + * work for additional information regarding copyright ownership. The ASF > + * licenses this file to You under the Apache License, Version 2.0 (the > + * "License"); you may not use this file except in compliance with the > License. > + * You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT > + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the > + * License for the specific language governing permissions and limitations > under > + * the License. > + */ > +public class TestTypePromotion extends LuceneTestCase { > + @Before > + public void setUp() throws Exception { > + super.setUp(); > + assumeFalse("cannot work with preflex codec", CodecProvider.getDefault() > + .getDefaultFieldCodec().equals("PreFlex")); > + } > + > + private static EnumSet<ValueType> INTEGERS = EnumSet.of(ValueType.VAR_INTS, > + ValueType.FIXED_INTS_16, ValueType.FIXED_INTS_32, > + ValueType.FIXED_INTS_64, ValueType.FIXED_INTS_8); > + > + private static EnumSet<ValueType> FLOATS = EnumSet.of(ValueType.FLOAT_32, > + ValueType.FLOAT_64); > + > + private static EnumSet<ValueType> UNSORTED_BYTES = EnumSet.of( > + ValueType.BYTES_FIXED_DEREF, ValueType.BYTES_FIXED_STRAIGHT, > + ValueType.BYTES_VAR_STRAIGHT, ValueType.BYTES_VAR_DEREF); > + > + private static EnumSet<ValueType> SORTED_BYTES = EnumSet.of( > + ValueType.BYTES_FIXED_SORTED, ValueType.BYTES_VAR_SORTED); > + > + public ValueType randomValueType(EnumSet<ValueType> typeEnum, Random > random) { > + ValueType[] array = typeEnum.toArray(new ValueType[0]); > + return array[random.nextInt(array.length)]; > + } > + > + private static enum TestType { > + Int, Float, Byte > + } > + > + private void runTest(EnumSet<ValueType> types, TestType type) > + throws CorruptIndexException, IOException { > + Directory dir = newDirectory(); > + IndexWriter writer = new IndexWriter(dir, > + newIndexWriterConfig(TEST_VERSION_CURRENT, new > MockAnalyzer(random))); > + int num_1 = atLeast(200); > + int num_2 = atLeast(200); > + int num_3 = atLeast(200); > + long[] values = new long[num_1 + num_2 + num_3]; > + index(writer, new IndexDocValuesField("promote"), > + randomValueType(types, random), values, 0, num_1); > + writer.commit(); > + > + index(writer, new IndexDocValuesField("promote"), > + randomValueType(types, random), values, num_1, num_2); > + writer.commit(); > + > + if (random.nextInt(4) == 0) { > + // once in a while use addIndexes > + writer.optimize(); > + > + Directory dir_2 = newDirectory() ; > + IndexWriter writer_2 = new IndexWriter(dir_2, > + newIndexWriterConfig(TEST_VERSION_CURRENT, new > MockAnalyzer(random))); > + index(writer_2, new IndexDocValuesField("promote"), > + randomValueType(types, random), values, num_1 + num_2, num_3); > + writer_2.commit(); > + writer_2.close(); > + if (random.nextBoolean()) { > + writer.addIndexes(dir_2); > + } else { > + // do a real merge here > + IndexReader open = IndexReader.open(dir_2); > + writer.addIndexes(open); > + open.close(); > + } > + dir_2.close(); > + } else { > + index(writer, new IndexDocValuesField("promote"), > + randomValueType(types, random), values, num_1 + num_2, num_3); > + } > + > + writer.optimize(); > + writer.close(); > + assertValues(type, dir, values); > + dir.close(); > + } > + > + private void assertValues(TestType type, Directory dir, long[] values) > + throws CorruptIndexException, IOException { > + IndexReader reader = IndexReader.open(dir); > + assertTrue(reader.isOptimized()); > + ReaderContext topReaderContext = reader.getTopReaderContext(); > + ReaderContext[] children = topReaderContext.children(); > + IndexDocValues docValues = children[0].reader.docValues("promote"); > + assertEquals(1, children.length); > + Source directSource = docValues.getDirectSource(); > + for (int i = 0; i < values.length; i++) { > + int id = Integer.parseInt(reader.document(i).get("id")); > + String msg = "id: " + id + " doc: " + i; > + switch (type) { > + case Byte: > + BytesRef bytes = directSource.getBytes(i, new BytesRef()); > + long value = 0; > + switch(bytes.length) { > + case 1: > + value = bytes.bytes[bytes.offset]; > + break; > + case 2: > + value = bytes.asShort(); > + break; > + case 4: > + value = bytes.asInt(); > + break; > + case 8: > + value = bytes.asLong(); > + break; > + > + default: > + fail(msg + " bytessize: " + bytes.length); > + } > + > + assertEquals(msg + " byteSize: " + bytes.length, values[id], value); > + break; > + case Float: > + assertEquals(msg, values[id], > Double.doubleToRawLongBits(directSource.getFloat(i))); > + break; > + case Int: > + assertEquals(msg, values[id], directSource.getInt(i)); > + default: > + break; > + } > + > + } > + docValues.close(); > + reader.close(); > + } > + > + public void index(IndexWriter writer, IndexDocValuesField valField, > + ValueType valueType, long[] values, int offset, int num) > + throws CorruptIndexException, IOException { > + BytesRef ref = new BytesRef(new byte[] { 1, 2, 3, 4 }); > + for (int i = offset; i < offset + num; i++) { > + Document doc = new Document(); > + doc.add(new Field("id", i + "", TextField.TYPE_STORED)); > + switch (valueType) { > + case VAR_INTS: > + values[i] = random.nextInt(); > + valField.setInt(values[i]); > + break; > + case FIXED_INTS_16: > + values[i] = random.nextInt(Short.MAX_VALUE); > + valField.setInt((short) values[i], true); > + break; > + case FIXED_INTS_32: > + values[i] = random.nextInt(); > + valField.setInt((int) values[i], true); > + break; > + case FIXED_INTS_64: > + values[i] = random.nextLong(); > + valField.setInt(values[i], true); > + break; > + case FLOAT_64: > + double nextDouble = random.nextDouble(); > + values[i] = Double.doubleToRawLongBits(nextDouble); > + valField.setFloat(nextDouble); > + break; > + case FLOAT_32: > + final float nextFloat = random.nextFloat(); > + values[i] = Double.doubleToRawLongBits(nextFloat); > + valField.setFloat(nextFloat); > + break; > + case FIXED_INTS_8: > + values[i] = (byte) i; > + valField.setInt((byte)values[i], true); > + break; > + case BYTES_FIXED_DEREF: > + case BYTES_FIXED_SORTED: > + case BYTES_FIXED_STRAIGHT: > + values[i] = random.nextLong(); > + ref.copy(values[i]); > + valField.setBytes(ref, valueType); > + break; > + case BYTES_VAR_DEREF: > + case BYTES_VAR_SORTED: > + case BYTES_VAR_STRAIGHT: > + if (random.nextBoolean()) { > + ref.copy(random.nextInt()); > + values[i] = ref.asInt(); > + } else { > + ref.copy(random.nextLong()); > + values[i] = ref.asLong(); > + } > + valField.setBytes(ref, valueType); > + break; > + > + default: > + fail("unexpected value " + valueType); > + > + } > + doc.add(valField); > + writer.addDocument(doc); > + if (random.nextInt(10) == 0) { > + writer.commit(); > + } > + } > + } > + > + public void testPromoteBytes() throws IOException { > + runTest(UNSORTED_BYTES, TestType.Byte); > + } > + > + public void testSortedPromoteBytes() throws IOException { > + runTest(SORTED_BYTES, TestType.Byte); > + } > + > + public void testPromotInteger() throws IOException { > + runTest(INTEGERS, TestType.Int); > + } > + > + public void testPromotFloatingPoint() throws CorruptIndexException, > + IOException { > + runTest(FLOATS, TestType.Float); > + } > + > + public void testMergeIncompatibleTypes() throws IOException { > + Directory dir = newDirectory(); > + IndexWriterConfig writerConfig = > newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); > + writerConfig.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES); // no > merges until we are done with adding values > + IndexWriter writer = new IndexWriter(dir, writerConfig); > + int num_1 = atLeast(200); > + int num_2 = atLeast(200); > + long[] values = new long[num_1 + num_2]; > + index(writer, new IndexDocValuesField("promote"), > + randomValueType(INTEGERS, random), values, 0, num_1); > + writer.commit(); > + > + if (random.nextInt(4) == 0) { > + // once in a while use addIndexes > + Directory dir_2 = newDirectory() ; > + IndexWriter writer_2 = new IndexWriter(dir_2, > + newIndexWriterConfig(TEST_VERSION_CURRENT, new > MockAnalyzer(random))); > + index(writer_2, new IndexDocValuesField("promote"), > + randomValueType(random.nextBoolean() ? UNSORTED_BYTES : > SORTED_BYTES, random), values, num_1, num_2); > + writer_2.commit(); > + writer_2.close(); > + if (random.nextBoolean()) { > + writer.addIndexes(dir_2); > + } else { > + // do a real merge here > + IndexReader open = IndexReader.open(dir_2); > + writer.addIndexes(open); > + open.close(); > + } > + dir_2.close(); > + } else { > + index(writer, new IndexDocValuesField("promote"), > + randomValueType(random.nextBoolean() ? UNSORTED_BYTES : > SORTED_BYTES, random), values, num_1, num_2); > + writer.commit(); > + } > + writer.close(); > + writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new > MockAnalyzer(random)); > + if (writerConfig.getMergePolicy() instanceof NoMergePolicy) { > + writerConfig.setMergePolicy(newLogMergePolicy()); // make sure we > optimize to one segment (merge everything together) > + } > + writer = new IndexWriter(dir, writerConfig); > + // now optimize > + writer.optimize(); > + writer.close(); > + IndexReader reader = IndexReader.open(dir); > + assertTrue(reader.isOptimized()); > + ReaderContext topReaderContext = reader.getTopReaderContext(); > + ReaderContext[] children = topReaderContext.children(); > + IndexDocValues docValues = children[0].reader.docValues("promote"); > + assertNotNull(docValues); > + assertValues(TestType.Byte, dir, values); > + assertEquals(ValueType.BYTES_VAR_STRAIGHT, docValues.type()); > + reader.close(); > + dir.close(); > + } > + > +} > \ No newline at end of file > > > --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
