http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java ---------------------------------------------------------------------- diff --git a/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java deleted file mode 100644 index c28af94..0000000 --- a/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java +++ /dev/null @@ -1,469 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.orc.impl; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.File; -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.orc.OrcFile; -import org.apache.orc.Reader; -import org.apache.orc.RecordReader; -import org.apache.orc.TypeDescription; -import org.apache.orc.Writer; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TestName; - -public class TestSchemaEvolution { - - @Rule - public TestName testCaseName = new TestName(); - - Configuration conf; - Path testFilePath; - FileSystem fs; - Path workDir = new Path(System.getProperty("test.tmp.dir", - "target" + File.separator + "test" + File.separator + "tmp")); - - @Before - public void setup() throws Exception { - conf = new Configuration(); - fs = FileSystem.getLocal(conf); - testFilePath = new Path(workDir, "TestOrcFile." + - testCaseName.getMethodName() + ".orc"); - fs.delete(testFilePath, false); - } - - @Test - public void testDataTypeConversion1() throws IOException { - TypeDescription fileStruct1 = TypeDescription.createStruct() - .addField("f1", TypeDescription.createInt()) - .addField("f2", TypeDescription.createString()) - .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10)); - SchemaEvolution same1 = new SchemaEvolution(fileStruct1, null); - assertFalse(same1.hasConversion()); - TypeDescription readerStruct1 = TypeDescription.createStruct() - .addField("f1", TypeDescription.createInt()) - .addField("f2", TypeDescription.createString()) - .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10)); - SchemaEvolution both1 = new SchemaEvolution(fileStruct1, readerStruct1, null); - assertFalse(both1.hasConversion()); - TypeDescription readerStruct1diff = TypeDescription.createStruct() - .addField("f1", TypeDescription.createLong()) - .addField("f2", TypeDescription.createString()) - .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10)); - SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, null); - assertTrue(both1diff.hasConversion()); - TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct() - .addField("f1", TypeDescription.createInt()) - .addField("f2", TypeDescription.createString()) - .addField("f3", TypeDescription.createDecimal().withPrecision(12).withScale(10)); - SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1, readerStruct1diffPrecision, null); - assertTrue(both1diffPrecision.hasConversion()); - } - - @Test - public void testDataTypeConversion2() throws IOException { - TypeDescription fileStruct2 = TypeDescription.createStruct() - .addField("f1", TypeDescription.createUnion() - .addUnionChild(TypeDescription.createByte()) - .addUnionChild(TypeDescription.createDecimal() - .withPrecision(20).withScale(10))) - .addField("f2", TypeDescription.createStruct() - .addField("f3", TypeDescription.createDate()) - .addField("f4", TypeDescription.createDouble()) - .addField("f5", TypeDescription.createBoolean())) - .addField("f6", TypeDescription.createChar().withMaxLength(100)); - SchemaEvolution same2 = new SchemaEvolution(fileStruct2, null); - assertFalse(same2.hasConversion()); - TypeDescription readerStruct2 = TypeDescription.createStruct() - .addField("f1", TypeDescription.createUnion() - .addUnionChild(TypeDescription.createByte()) - .addUnionChild(TypeDescription.createDecimal() - .withPrecision(20).withScale(10))) - .addField("f2", TypeDescription.createStruct() - .addField("f3", TypeDescription.createDate()) - .addField("f4", TypeDescription.createDouble()) - .addField("f5", TypeDescription.createBoolean())) - .addField("f6", TypeDescription.createChar().withMaxLength(100)); - SchemaEvolution both2 = new SchemaEvolution(fileStruct2, readerStruct2, null); - assertFalse(both2.hasConversion()); - TypeDescription readerStruct2diff = TypeDescription.createStruct() - .addField("f1", TypeDescription.createUnion() - .addUnionChild(TypeDescription.createByte()) - .addUnionChild(TypeDescription.createDecimal() - .withPrecision(20).withScale(10))) - .addField("f2", TypeDescription.createStruct() - .addField("f3", TypeDescription.createDate()) - .addField("f4", TypeDescription.createDouble()) - .addField("f5", TypeDescription.createByte())) - .addField("f6", TypeDescription.createChar().withMaxLength(100)); - SchemaEvolution both2diff = new SchemaEvolution(fileStruct2, readerStruct2diff, null); - assertTrue(both2diff.hasConversion()); - TypeDescription readerStruct2diffChar = TypeDescription.createStruct() - .addField("f1", TypeDescription.createUnion() - .addUnionChild(TypeDescription.createByte()) - .addUnionChild(TypeDescription.createDecimal() - .withPrecision(20).withScale(10))) - .addField("f2", TypeDescription.createStruct() - .addField("f3", TypeDescription.createDate()) - .addField("f4", TypeDescription.createDouble()) - .addField("f5", TypeDescription.createBoolean())) - .addField("f6", TypeDescription.createChar().withMaxLength(80)); - SchemaEvolution both2diffChar = new SchemaEvolution(fileStruct2, readerStruct2diffChar, null); - assertTrue(both2diffChar.hasConversion()); - } - - @Test - public void testFloatToDoubleEvolution() throws Exception { - testFilePath = new Path(workDir, "TestOrcFile." + - testCaseName.getMethodName() + ".orc"); - TypeDescription schema = TypeDescription.createFloat(); - Writer writer = OrcFile.createWriter(testFilePath, - OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000) - .bufferSize(10000)); - VectorizedRowBatch batch = new VectorizedRowBatch(1, 1024); - DoubleColumnVector dcv = new DoubleColumnVector(1024); - batch.cols[0] = dcv; - batch.reset(); - batch.size = 1; - dcv.vector[0] = 74.72f; - writer.addRowBatch(batch); - writer.close(); - - Reader reader = OrcFile.createReader(testFilePath, - OrcFile.readerOptions(conf).filesystem(fs)); - TypeDescription schemaOnRead = TypeDescription.createDouble(); - RecordReader rows = reader.rows(new Reader.Options().schema(schemaOnRead)); - batch = schemaOnRead.createRowBatch(); - rows.nextBatch(batch); - assertEquals(74.72, ((DoubleColumnVector) batch.cols[0]).vector[0], 0.00000000001); - rows.close(); - } - - @Test - public void testSafePpdEvaluation() throws IOException { - TypeDescription fileStruct1 = TypeDescription.createStruct() - .addField("f1", TypeDescription.createInt()) - .addField("f2", TypeDescription.createString()) - .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10)); - SchemaEvolution same1 = new SchemaEvolution(fileStruct1, null); - assertTrue(same1.isPPDSafeConversion(0)); - assertFalse(same1.hasConversion()); - TypeDescription readerStruct1 = TypeDescription.createStruct() - .addField("f1", TypeDescription.createInt()) - .addField("f2", TypeDescription.createString()) - .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10)); - SchemaEvolution both1 = new SchemaEvolution(fileStruct1, readerStruct1, null); - assertFalse(both1.hasConversion()); - assertTrue(both1.isPPDSafeConversion(0)); - assertTrue(both1.isPPDSafeConversion(1)); - assertTrue(both1.isPPDSafeConversion(2)); - assertTrue(both1.isPPDSafeConversion(3)); - - // int -> long - TypeDescription readerStruct1diff = TypeDescription.createStruct() - .addField("f1", TypeDescription.createLong()) - .addField("f2", TypeDescription.createString()) - .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10)); - SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, null); - assertTrue(both1diff.hasConversion()); - assertFalse(both1diff.isPPDSafeConversion(0)); - assertTrue(both1diff.isPPDSafeConversion(1)); - assertTrue(both1diff.isPPDSafeConversion(2)); - assertTrue(both1diff.isPPDSafeConversion(3)); - - // decimal(38,10) -> decimal(12, 10) - TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct() - .addField("f1", TypeDescription.createInt()) - .addField("f2", TypeDescription.createString()) - .addField("f3", TypeDescription.createDecimal().withPrecision(12).withScale(10)); - SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1, readerStruct1diffPrecision, - new boolean[] {true, false, false, true}); - assertTrue(both1diffPrecision.hasConversion()); - assertFalse(both1diffPrecision.isPPDSafeConversion(0)); - assertFalse(both1diffPrecision.isPPDSafeConversion(1)); // column not included - assertFalse(both1diffPrecision.isPPDSafeConversion(2)); // column not included - assertFalse(both1diffPrecision.isPPDSafeConversion(3)); - - // add columns - readerStruct1 = TypeDescription.createStruct() - .addField("f1", TypeDescription.createInt()) - .addField("f2", TypeDescription.createString()) - .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10)) - .addField("f4", TypeDescription.createBoolean()); - both1 = new SchemaEvolution(fileStruct1, readerStruct1, null); - assertTrue(both1.hasConversion()); - assertFalse(both1.isPPDSafeConversion(0)); - assertTrue(both1.isPPDSafeConversion(1)); - assertTrue(both1.isPPDSafeConversion(2)); - assertTrue(both1.isPPDSafeConversion(3)); - assertFalse(both1.isPPDSafeConversion(4)); - } - - @Test - public void testSafePpdEvaluationForInts() throws IOException { - // byte -> short -> int -> long - TypeDescription fileSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createByte()); - SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null); - assertFalse(schemaEvolution.hasConversion()); - - // byte -> short - TypeDescription readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createShort()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertTrue(schemaEvolution.isPPDSafeConversion(1)); - - // byte -> int - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createInt()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertTrue(schemaEvolution.isPPDSafeConversion(1)); - - // byte -> long - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createLong()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertTrue(schemaEvolution.isPPDSafeConversion(1)); - - // short -> int -> long - fileSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createShort()); - schemaEvolution = new SchemaEvolution(fileSchema, null); - assertFalse(schemaEvolution.hasConversion()); - - // unsafe conversion short -> byte - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createByte()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - - // short -> int - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createInt()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertTrue(schemaEvolution.isPPDSafeConversion(1)); - - // short -> long - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createLong()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertTrue(schemaEvolution.isPPDSafeConversion(1)); - - // int -> long - fileSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createInt()); - schemaEvolution = new SchemaEvolution(fileSchema, null); - assertFalse(schemaEvolution.hasConversion()); - - // unsafe conversion int -> byte - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createByte()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - - // unsafe conversion int -> short - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createShort()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - - // int -> long - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createLong()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertTrue(schemaEvolution.isPPDSafeConversion(1)); - - // long - fileSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createLong()); - schemaEvolution = new SchemaEvolution(fileSchema, null); - assertTrue(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.hasConversion()); - - // unsafe conversion long -> byte - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createByte()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - - // unsafe conversion long -> short - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createShort()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - - // unsafe conversion long -> int - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createInt()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - - // invalid - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createString()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - - // invalid - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createFloat()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - - // invalid - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createTimestamp()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - } - - @Test - public void testSafePpdEvaluationForStrings() throws IOException { - TypeDescription fileSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createString()); - SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null); - assertTrue(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.hasConversion()); - - // string -> char - TypeDescription readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createChar()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - - // string -> varchar - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createVarchar()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertTrue(schemaEvolution.isPPDSafeConversion(1)); - - fileSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createChar()); - schemaEvolution = new SchemaEvolution(fileSchema, null); - assertTrue(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.hasConversion()); - - // char -> string - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createString()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - - // char -> varchar - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createVarchar()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - - fileSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createVarchar()); - schemaEvolution = new SchemaEvolution(fileSchema, null); - assertTrue(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.hasConversion()); - - // varchar -> string - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createString()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertTrue(schemaEvolution.isPPDSafeConversion(1)); - - // varchar -> char - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createChar()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - - // invalid - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createDecimal()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - - // invalid - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createDate()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - - // invalid - readerSchema = TypeDescription.createStruct() - .addField("f1", TypeDescription.createInt()); - schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); - assertTrue(schemaEvolution.hasConversion()); - assertFalse(schemaEvolution.isPPDSafeConversion(0)); - assertFalse(schemaEvolution.isPPDSafeConversion(1)); - } -}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/test/org/apache/orc/impl/TestSerializationUtils.java ---------------------------------------------------------------------- diff --git a/orc/src/test/org/apache/orc/impl/TestSerializationUtils.java b/orc/src/test/org/apache/orc/impl/TestSerializationUtils.java deleted file mode 100644 index 4a8a0f2..0000000 --- a/orc/src/test/org/apache/orc/impl/TestSerializationUtils.java +++ /dev/null @@ -1,201 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.orc.impl; - -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.InputStream; -import java.math.BigInteger; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; - -import org.junit.Test; - -import com.google.common.math.LongMath; - -public class TestSerializationUtils { - - private InputStream fromBuffer(ByteArrayOutputStream buffer) { - return new ByteArrayInputStream(buffer.toByteArray()); - } - - @Test - public void testDoubles() throws Exception { - double tolerance = 0.0000000000000001; - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - SerializationUtils utils = new SerializationUtils(); - utils.writeDouble(buffer, 1343822337.759); - assertEquals(1343822337.759, utils.readDouble(fromBuffer(buffer)), tolerance); - buffer = new ByteArrayOutputStream(); - utils.writeDouble(buffer, 0.8); - double got = utils.readDouble(fromBuffer(buffer)); - assertEquals(0.8, got, tolerance); - } - - @Test - public void testBigIntegers() throws Exception { - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(0)); - assertArrayEquals(new byte[]{0}, buffer.toByteArray()); - assertEquals(0L, - SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue()); - buffer.reset(); - SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(1)); - assertArrayEquals(new byte[]{2}, buffer.toByteArray()); - assertEquals(1L, - SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue()); - buffer.reset(); - SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-1)); - assertArrayEquals(new byte[]{1}, buffer.toByteArray()); - assertEquals(-1L, - SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue()); - buffer.reset(); - SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(50)); - assertArrayEquals(new byte[]{100}, buffer.toByteArray()); - assertEquals(50L, - SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue()); - buffer.reset(); - SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-50)); - assertArrayEquals(new byte[]{99}, buffer.toByteArray()); - assertEquals(-50L, - SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue()); - for(int i=-8192; i < 8192; ++i) { - buffer.reset(); - SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(i)); - assertEquals("compare length for " + i, - i >= -64 && i < 64 ? 1 : 2, buffer.size()); - assertEquals("compare result for " + i, - i, SerializationUtils.readBigInteger(fromBuffer(buffer)).intValue()); - } - buffer.reset(); - SerializationUtils.writeBigInteger(buffer, - new BigInteger("123456789abcdef0",16)); - assertEquals(new BigInteger("123456789abcdef0",16), - SerializationUtils.readBigInteger(fromBuffer(buffer))); - buffer.reset(); - SerializationUtils.writeBigInteger(buffer, - new BigInteger("-123456789abcdef0",16)); - assertEquals(new BigInteger("-123456789abcdef0",16), - SerializationUtils.readBigInteger(fromBuffer(buffer))); - StringBuilder buf = new StringBuilder(); - for(int i=0; i < 256; ++i) { - String num = Integer.toHexString(i); - if (num.length() == 1) { - buf.append('0'); - } - buf.append(num); - } - buffer.reset(); - SerializationUtils.writeBigInteger(buffer, - new BigInteger(buf.toString(),16)); - assertEquals(new BigInteger(buf.toString(),16), - SerializationUtils.readBigInteger(fromBuffer(buffer))); - buffer.reset(); - SerializationUtils.writeBigInteger(buffer, - new BigInteger("ff000000000000000000000000000000000000000000ff",16)); - assertEquals( - new BigInteger("ff000000000000000000000000000000000000000000ff",16), - SerializationUtils.readBigInteger(fromBuffer(buffer))); - } - - @Test - public void testSubtractionOverflow() { - // cross check results with Guava results below - SerializationUtils utils = new SerializationUtils(); - assertEquals(false, utils.isSafeSubtract(22222222222L, Long.MIN_VALUE)); - assertEquals(false, utils.isSafeSubtract(-22222222222L, Long.MAX_VALUE)); - assertEquals(false, utils.isSafeSubtract(Long.MIN_VALUE, Long.MAX_VALUE)); - assertEquals(true, utils.isSafeSubtract(-1553103058346370095L, 6553103058346370095L)); - assertEquals(true, utils.isSafeSubtract(0, Long.MAX_VALUE)); - assertEquals(true, utils.isSafeSubtract(Long.MIN_VALUE, 0)); - } - - @Test - public void testSubtractionOverflowGuava() { - try { - LongMath.checkedSubtract(22222222222L, Long.MIN_VALUE); - fail("expected ArithmeticException for overflow"); - } catch (ArithmeticException ex) { - assertEquals(ex.getMessage(), "overflow"); - } - - try { - LongMath.checkedSubtract(-22222222222L, Long.MAX_VALUE); - fail("expected ArithmeticException for overflow"); - } catch (ArithmeticException ex) { - assertEquals(ex.getMessage(), "overflow"); - } - - try { - LongMath.checkedSubtract(Long.MIN_VALUE, Long.MAX_VALUE); - fail("expected ArithmeticException for overflow"); - } catch (ArithmeticException ex) { - assertEquals(ex.getMessage(), "overflow"); - } - - assertEquals(-8106206116692740190L, - LongMath.checkedSubtract(-1553103058346370095L, 6553103058346370095L)); - assertEquals(-Long.MAX_VALUE, LongMath.checkedSubtract(0, Long.MAX_VALUE)); - assertEquals(Long.MIN_VALUE, LongMath.checkedSubtract(Long.MIN_VALUE, 0)); - } - - @Test - public void testRandomFloats() throws Exception { - float tolerance = 0.0000000000000001f; - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - SerializationUtils utils = new SerializationUtils(); - Random rand = new Random(); - int n = 100_000; - float[] expected = new float[n]; - for (int i = 0; i < n; i++) { - float f = rand.nextFloat(); - expected[i] = f; - utils.writeFloat(buffer, f); - } - InputStream newBuffer = fromBuffer(buffer); - for (int i = 0; i < n; i++) { - float got = utils.readFloat(newBuffer); - assertEquals(expected[i], got, tolerance); - } - } - - @Test - public void testRandomDoubles() throws Exception { - double tolerance = 0.0000000000000001; - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - SerializationUtils utils = new SerializationUtils(); - Random rand = new Random(); - int n = 100_000; - double[] expected = new double[n]; - for (int i = 0; i < n; i++) { - double d = rand.nextDouble(); - expected[i] = d; - utils.writeDouble(buffer, d); - } - InputStream newBuffer = fromBuffer(buffer); - for (int i = 0; i < n; i++) { - double got = utils.readDouble(newBuffer); - assertEquals(expected[i], got, tolerance); - } - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/test/org/apache/orc/impl/TestStreamName.java ---------------------------------------------------------------------- diff --git a/orc/src/test/org/apache/orc/impl/TestStreamName.java b/orc/src/test/org/apache/orc/impl/TestStreamName.java deleted file mode 100644 index be58d4c..0000000 --- a/orc/src/test/org/apache/orc/impl/TestStreamName.java +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.orc.impl; - -import org.apache.orc.OrcProto; -import org.junit.Test; - -import static org.junit.Assert.assertEquals; - -public class TestStreamName { - - @Test - public void test1() throws Exception { - StreamName s1 = new StreamName(3, OrcProto.Stream.Kind.DATA); - StreamName s2 = new StreamName(3, - OrcProto.Stream.Kind.DICTIONARY_DATA); - StreamName s3 = new StreamName(5, OrcProto.Stream.Kind.DATA); - StreamName s4 = new StreamName(5, - OrcProto.Stream.Kind.DICTIONARY_DATA); - StreamName s1p = new StreamName(3, OrcProto.Stream.Kind.DATA); - assertEquals(true, s1.equals(s1)); - assertEquals(false, s1.equals(s2)); - assertEquals(false, s1.equals(s3)); - assertEquals(true, s1.equals(s1p)); - assertEquals(true, s1.compareTo(null) < 0); - assertEquals(false, s1.equals(null)); - assertEquals(true, s1.compareTo(s2) < 0); - assertEquals(true, s2.compareTo(s3) < 0); - assertEquals(true, s3.compareTo(s4) < 0); - assertEquals(true, s4.compareTo(s1p) > 0); - assertEquals(0, s1p.compareTo(s1)); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/test/org/apache/orc/impl/TestStringRedBlackTree.java ---------------------------------------------------------------------- diff --git a/orc/src/test/org/apache/orc/impl/TestStringRedBlackTree.java b/orc/src/test/org/apache/orc/impl/TestStringRedBlackTree.java deleted file mode 100644 index 3d4612c..0000000 --- a/orc/src/test/org/apache/orc/impl/TestStringRedBlackTree.java +++ /dev/null @@ -1,234 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.orc.impl; - -import org.apache.hadoop.io.DataOutputBuffer; -import org.apache.hadoop.io.IntWritable; -import org.apache.orc.impl.RedBlackTree; -import org.apache.orc.impl.StringRedBlackTree; -import org.junit.Test; - -import java.io.IOException; - -import static junit.framework.Assert.assertEquals; - -/** - * Test the red-black tree with string keys. - */ -public class TestStringRedBlackTree { - - /** - * Checks the red-black tree rules to make sure that we have correctly built - * a valid tree. - * - * Properties: - * 1. Red nodes must have black children - * 2. Each node must have the same black height on both sides. - * - * @param node The id of the root of the subtree to check for the red-black - * tree properties. - * @return The black-height of the subtree. - */ - private int checkSubtree(RedBlackTree tree, int node, IntWritable count - ) throws IOException { - if (node == RedBlackTree.NULL) { - return 1; - } - count.set(count.get() + 1); - boolean is_red = tree.isRed(node); - int left = tree.getLeft(node); - int right = tree.getRight(node); - if (is_red) { - if (tree.isRed(left)) { - printTree(tree, "", tree.root); - throw new IllegalStateException("Left node of " + node + " is " + left + - " and both are red."); - } - if (tree.isRed(right)) { - printTree(tree, "", tree.root); - throw new IllegalStateException("Right node of " + node + " is " + - right + " and both are red."); - } - } - int left_depth = checkSubtree(tree, left, count); - int right_depth = checkSubtree(tree, right, count); - if (left_depth != right_depth) { - printTree(tree, "", tree.root); - throw new IllegalStateException("Lopsided tree at node " + node + - " with depths " + left_depth + " and " + right_depth); - } - if (is_red) { - return left_depth; - } else { - return left_depth + 1; - } - } - - /** - * Checks the validity of the entire tree. Also ensures that the number of - * nodes visited is the same as the size of the set. - */ - void checkTree(RedBlackTree tree) throws IOException { - IntWritable count = new IntWritable(0); - if (tree.isRed(tree.root)) { - printTree(tree, "", tree.root); - throw new IllegalStateException("root is red"); - } - checkSubtree(tree, tree.root, count); - if (count.get() != tree.size) { - printTree(tree, "", tree.root); - throw new IllegalStateException("Broken tree! visited= " + count.get() + - " size=" + tree.size); - } - } - - void printTree(RedBlackTree tree, String indent, int node - ) throws IOException { - if (node == RedBlackTree.NULL) { - System.err.println(indent + "NULL"); - } else { - System.err.println(indent + "Node " + node + " color " + - (tree.isRed(node) ? "red" : "black")); - printTree(tree, indent + " ", tree.getLeft(node)); - printTree(tree, indent + " ", tree.getRight(node)); - } - } - - private static class MyVisitor implements StringRedBlackTree.Visitor { - private final String[] words; - private final int[] order; - private final DataOutputBuffer buffer = new DataOutputBuffer(); - int current = 0; - - MyVisitor(String[] args, int[] order) { - words = args; - this.order = order; - } - - @Override - public void visit(StringRedBlackTree.VisitorContext context - ) throws IOException { - String word = context.getText().toString(); - assertEquals("in word " + current, words[current], word); - assertEquals("in word " + current, order[current], - context.getOriginalPosition()); - buffer.reset(); - context.writeBytes(buffer); - assertEquals(word, new String(buffer.getData(),0,buffer.getLength())); - current += 1; - } - } - - void checkContents(StringRedBlackTree tree, int[] order, - String... params - ) throws IOException { - tree.visit(new MyVisitor(params, order)); - } - - StringRedBlackTree buildTree(String... params) throws IOException { - StringRedBlackTree result = new StringRedBlackTree(1000); - for(String word: params) { - result.add(word); - checkTree(result); - } - return result; - } - - @Test - public void test1() throws Exception { - StringRedBlackTree tree = new StringRedBlackTree(5); - assertEquals(0, tree.getSizeInBytes()); - checkTree(tree); - assertEquals(0, tree.add("owen")); - checkTree(tree); - assertEquals(1, tree.add("ashutosh")); - checkTree(tree); - assertEquals(0, tree.add("owen")); - checkTree(tree); - assertEquals(2, tree.add("alan")); - checkTree(tree); - assertEquals(2, tree.add("alan")); - checkTree(tree); - assertEquals(1, tree.add("ashutosh")); - checkTree(tree); - assertEquals(3, tree.add("greg")); - checkTree(tree); - assertEquals(4, tree.add("eric")); - checkTree(tree); - assertEquals(5, tree.add("arun")); - checkTree(tree); - assertEquals(6, tree.size()); - checkTree(tree); - assertEquals(6, tree.add("eric14")); - checkTree(tree); - assertEquals(7, tree.add("o")); - checkTree(tree); - assertEquals(8, tree.add("ziggy")); - checkTree(tree); - assertEquals(9, tree.add("z")); - checkTree(tree); - checkContents(tree, new int[]{2,5,1,4,6,3,7,0,9,8}, - "alan", "arun", "ashutosh", "eric", "eric14", "greg", - "o", "owen", "z", "ziggy"); - assertEquals(32888, tree.getSizeInBytes()); - // check that adding greg again bumps the count - assertEquals(3, tree.add("greg")); - assertEquals(41, tree.getCharacterSize()); - // add some more strings to test the different branches of the - // rebalancing - assertEquals(10, tree.add("zak")); - checkTree(tree); - assertEquals(11, tree.add("eric1")); - checkTree(tree); - assertEquals(12, tree.add("ash")); - checkTree(tree); - assertEquals(13, tree.add("harry")); - checkTree(tree); - assertEquals(14, tree.add("john")); - checkTree(tree); - tree.clear(); - checkTree(tree); - assertEquals(0, tree.getSizeInBytes()); - assertEquals(0, tree.getCharacterSize()); - } - - @Test - public void test2() throws Exception { - StringRedBlackTree tree = - buildTree("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", - "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"); - assertEquals(26, tree.size()); - checkContents(tree, new int[]{0,1,2, 3,4,5, 6,7,8, 9,10,11, 12,13,14, - 15,16,17, 18,19,20, 21,22,23, 24,25}, - "a", "b", "c", "d", "e", "f", "g", "h", "i", "j","k", "l", "m", "n", "o", - "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"); - } - - @Test - public void test3() throws Exception { - StringRedBlackTree tree = - buildTree("z", "y", "x", "w", "v", "u", "t", "s", "r", "q", "p", "o", "n", - "m", "l", "k", "j", "i", "h", "g", "f", "e", "d", "c", "b", "a"); - assertEquals(26, tree.size()); - checkContents(tree, new int[]{25,24,23, 22,21,20, 19,18,17, 16,15,14, - 13,12,11, 10,9,8, 7,6,5, 4,3,2, 1,0}, - "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", - "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/test/org/apache/orc/impl/TestZlib.java ---------------------------------------------------------------------- diff --git a/orc/src/test/org/apache/orc/impl/TestZlib.java b/orc/src/test/org/apache/orc/impl/TestZlib.java deleted file mode 100644 index 327ecfc..0000000 --- a/orc/src/test/org/apache/orc/impl/TestZlib.java +++ /dev/null @@ -1,56 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.orc.impl; - -import org.apache.orc.CompressionCodec; -import org.junit.Test; - -import java.io.IOException; -import java.nio.ByteBuffer; - -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.fail; - -public class TestZlib { - - @Test - public void testNoOverflow() throws Exception { - ByteBuffer in = ByteBuffer.allocate(10); - ByteBuffer out = ByteBuffer.allocate(10); - in.put(new byte[]{1,2,3,4,5,6,7,10}); - in.flip(); - CompressionCodec codec = new ZlibCodec(); - assertEquals(false, codec.compress(in, out, null)); - } - - @Test - public void testCorrupt() throws Exception { - ByteBuffer buf = ByteBuffer.allocate(1000); - buf.put(new byte[]{127,-128,0,99,98,-1}); - buf.flip(); - CompressionCodec codec = new ZlibCodec(); - ByteBuffer out = ByteBuffer.allocate(1000); - try { - codec.decompress(buf, out); - fail(); - } catch (IOException ioe) { - // EXPECTED - } - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/test/org/apache/orc/tools/TestFileDump.java ---------------------------------------------------------------------- diff --git a/orc/src/test/org/apache/orc/tools/TestFileDump.java b/orc/src/test/org/apache/orc/tools/TestFileDump.java deleted file mode 100644 index ce3381e..0000000 --- a/orc/src/test/org/apache/orc/tools/TestFileDump.java +++ /dev/null @@ -1,486 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.orc.tools; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; - -import java.io.BufferedReader; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.PrintStream; -import java.sql.Date; -import java.sql.Timestamp; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Random; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.orc.CompressionKind; -import org.apache.orc.OrcConf; -import org.apache.orc.OrcFile; -import org.apache.orc.TypeDescription; -import org.apache.orc.Writer; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -public class TestFileDump { - - Path workDir = new Path(System.getProperty("test.tmp.dir")); - Configuration conf; - FileSystem fs; - Path testFilePath; - - @Before - public void openFileSystem () throws Exception { - conf = new Configuration(); - fs = FileSystem.getLocal(conf); - fs.setWorkingDirectory(workDir); - testFilePath = new Path("TestFileDump.testDump.orc"); - fs.delete(testFilePath, false); - } - - static TypeDescription getMyRecordType() { - return TypeDescription.createStruct() - .addField("i", TypeDescription.createInt()) - .addField("l", TypeDescription.createLong()) - .addField("s", TypeDescription.createString()); - } - - static void appendMyRecord(VectorizedRowBatch batch, - int i, - long l, - String str) { - ((LongColumnVector) batch.cols[0]).vector[batch.size] = i; - ((LongColumnVector) batch.cols[1]).vector[batch.size] = l; - if (str == null) { - batch.cols[2].noNulls = false; - batch.cols[2].isNull[batch.size] = true; - } else { - ((BytesColumnVector) batch.cols[2]).setVal(batch.size, - str.getBytes()); - } - batch.size += 1; - } - - static TypeDescription getAllTypesType() { - return TypeDescription.createStruct() - .addField("b", TypeDescription.createBoolean()) - .addField("bt", TypeDescription.createByte()) - .addField("s", TypeDescription.createShort()) - .addField("i", TypeDescription.createInt()) - .addField("l", TypeDescription.createLong()) - .addField("f", TypeDescription.createFloat()) - .addField("d", TypeDescription.createDouble()) - .addField("de", TypeDescription.createDecimal()) - .addField("t", TypeDescription.createTimestamp()) - .addField("dt", TypeDescription.createDate()) - .addField("str", TypeDescription.createString()) - .addField("c", TypeDescription.createChar().withMaxLength(5)) - .addField("vc", TypeDescription.createVarchar().withMaxLength(10)) - .addField("m", TypeDescription.createMap( - TypeDescription.createString(), - TypeDescription.createString())) - .addField("a", TypeDescription.createList(TypeDescription.createInt())) - .addField("st", TypeDescription.createStruct() - .addField("i", TypeDescription.createInt()) - .addField("s", TypeDescription.createString())); - } - - static void appendAllTypes(VectorizedRowBatch batch, - boolean b, - byte bt, - short s, - int i, - long l, - float f, - double d, - HiveDecimalWritable de, - Timestamp t, - DateWritable dt, - String str, - String c, - String vc, - Map<String, String> m, - List<Integer> a, - int sti, - String sts) { - int row = batch.size++; - ((LongColumnVector) batch.cols[0]).vector[row] = b ? 1 : 0; - ((LongColumnVector) batch.cols[1]).vector[row] = bt; - ((LongColumnVector) batch.cols[2]).vector[row] = s; - ((LongColumnVector) batch.cols[3]).vector[row] = i; - ((LongColumnVector) batch.cols[4]).vector[row] = l; - ((DoubleColumnVector) batch.cols[5]).vector[row] = f; - ((DoubleColumnVector) batch.cols[6]).vector[row] = d; - ((DecimalColumnVector) batch.cols[7]).vector[row].set(de); - ((TimestampColumnVector) batch.cols[8]).set(row, t); - ((LongColumnVector) batch.cols[9]).vector[row] = dt.getDays(); - ((BytesColumnVector) batch.cols[10]).setVal(row, str.getBytes()); - ((BytesColumnVector) batch.cols[11]).setVal(row, c.getBytes()); - ((BytesColumnVector) batch.cols[12]).setVal(row, vc.getBytes()); - MapColumnVector map = (MapColumnVector) batch.cols[13]; - int offset = map.childCount; - map.offsets[row] = offset; - map.lengths[row] = m.size(); - map.childCount += map.lengths[row]; - for(Map.Entry<String, String> entry: m.entrySet()) { - ((BytesColumnVector) map.keys).setVal(offset, entry.getKey().getBytes()); - ((BytesColumnVector) map.values).setVal(offset++, - entry.getValue().getBytes()); - } - ListColumnVector list = (ListColumnVector) batch.cols[14]; - offset = list.childCount; - list.offsets[row] = offset; - list.lengths[row] = a.size(); - list.childCount += list.lengths[row]; - for(int e=0; e < a.size(); ++e) { - ((LongColumnVector) list.child).vector[offset + e] = a.get(e); - } - StructColumnVector struct = (StructColumnVector) batch.cols[15]; - ((LongColumnVector) struct.fields[0]).vector[row] = sti; - ((BytesColumnVector) struct.fields[1]).setVal(row, sts.getBytes()); - } - - public static void checkOutput(String expected, - String actual) throws Exception { - BufferedReader eStream = - new BufferedReader(new FileReader - (TestJsonFileDump.getFileFromClasspath(expected))); - BufferedReader aStream = - new BufferedReader(new FileReader(actual)); - String expectedLine = eStream.readLine().trim(); - while (expectedLine != null) { - String actualLine = aStream.readLine().trim(); - System.out.println("actual: " + actualLine); - System.out.println("expected: " + expectedLine); - Assert.assertEquals(expectedLine, actualLine); - expectedLine = eStream.readLine(); - expectedLine = expectedLine == null ? null : expectedLine.trim(); - } - Assert.assertNull(eStream.readLine()); - Assert.assertNull(aStream.readLine()); - eStream.close(); - aStream.close(); - } - - @Test - public void testDump() throws Exception { - TypeDescription schema = getMyRecordType(); - conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION"); - Writer writer = OrcFile.createWriter(testFilePath, - OrcFile.writerOptions(conf) - .fileSystem(fs) - .setSchema(schema) - .compress(CompressionKind.ZLIB) - .stripeSize(100000) - .rowIndexStride(1000)); - Random r1 = new Random(1); - String[] words = new String[]{"It", "was", "the", "best", "of", "times,", - "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age", - "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it", - "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch", - "of", "incredulity,", "it", "was", "the", "season", "of", "Light,", - "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the", - "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,", - "we", "had", "everything", "before", "us,", "we", "had", "nothing", - "before", "us,", "we", "were", "all", "going", "direct", "to", - "Heaven,", "we", "were", "all", "going", "direct", "the", "other", - "way"}; - VectorizedRowBatch batch = schema.createRowBatch(1000); - for(int i=0; i < 21000; ++i) { - appendMyRecord(batch, r1.nextInt(), r1.nextLong(), - words[r1.nextInt(words.length)]); - if (batch.size == batch.getMaxSize()) { - writer.addRowBatch(batch); - batch.reset(); - } - } - if (batch.size > 0) { - writer.addRowBatch(batch); - } - writer.close(); - PrintStream origOut = System.out; - String outputFilename = "orc-file-dump.out"; - FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename); - - // replace stdout and run command - System.setOut(new PrintStream(myOut)); - FileDump.main(new String[]{testFilePath.toString(), "--rowindex=1,2,3"}); - System.out.flush(); - System.setOut(origOut); - - - checkOutput(outputFilename, workDir + File.separator + outputFilename); - } - - @Test - public void testDataDump() throws Exception { - TypeDescription schema = getAllTypesType(); - Writer writer = OrcFile.createWriter(testFilePath, - OrcFile.writerOptions(conf) - .fileSystem(fs) - .setSchema(schema) - .stripeSize(100000) - .compress(CompressionKind.NONE) - .bufferSize(10000) - .rowIndexStride(1000)); - VectorizedRowBatch batch = schema.createRowBatch(1000); - Map<String, String> m = new HashMap<String, String>(2); - m.put("k1", "v1"); - appendAllTypes(batch, - true, - (byte) 10, - (short) 100, - 1000, - 10000L, - 4.0f, - 20.0, - new HiveDecimalWritable("4.2222"), - new Timestamp(1416967764000L), - new DateWritable(new Date(1416967764000L)), - "string", - "hello", - "hello", - m, - Arrays.asList(100, 200), - 10, "foo"); - m.clear(); - m.put("k3", "v3"); - appendAllTypes( - batch, - false, - (byte)20, - (short)200, - 2000, - 20000L, - 8.0f, - 40.0, - new HiveDecimalWritable("2.2222"), - new Timestamp(1416967364000L), - new DateWritable(new Date(1411967764000L)), - "abcd", - "world", - "world", - m, - Arrays.asList(200, 300), - 20, "bar"); - writer.addRowBatch(batch); - - writer.close(); - PrintStream origOut = System.out; - ByteArrayOutputStream myOut = new ByteArrayOutputStream(); - - // replace stdout and run command - System.setOut(new PrintStream(myOut)); - FileDump.main(new String[]{testFilePath.toString(), "-d"}); - System.out.flush(); - System.setOut(origOut); - String[] lines = myOut.toString().split("\n"); - Assert.assertEquals("{\"b\":true,\"bt\":10,\"s\":100,\"i\":1000,\"l\":10000,\"f\":4,\"d\":20,\"de\":\"4.2222\",\"t\":\"2014-11-25 18:09:24.0\",\"dt\":\"2014-11-25\",\"str\":\"string\",\"c\":\"hello\",\"vc\":\"hello\",\"m\":[{\"_key\":\"k1\",\"_value\":\"v1\"}],\"a\":[100,200],\"st\":{\"i\":10,\"s\":\"foo\"}}", lines[0]); - Assert.assertEquals("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44.0\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world\",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]); - } - - // Test that if the fraction of rows that have distinct strings is greater than the configured - // threshold dictionary encoding is turned off. If dictionary encoding is turned off the length - // of the dictionary stream for the column will be 0 in the ORC file dump. - @Test - public void testDictionaryThreshold() throws Exception { - TypeDescription schema = getMyRecordType(); - Configuration conf = new Configuration(); - conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION"); - conf.setFloat(OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getAttribute(), 0.49f); - Writer writer = OrcFile.createWriter(testFilePath, - OrcFile.writerOptions(conf) - .fileSystem(fs) - .setSchema(schema) - .stripeSize(100000) - .compress(CompressionKind.ZLIB) - .rowIndexStride(1000) - .bufferSize(10000)); - VectorizedRowBatch batch = schema.createRowBatch(1000); - Random r1 = new Random(1); - String[] words = new String[]{"It", "was", "the", "best", "of", "times,", - "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age", - "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it", - "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch", - "of", "incredulity,", "it", "was", "the", "season", "of", "Light,", - "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the", - "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,", - "we", "had", "everything", "before", "us,", "we", "had", "nothing", - "before", "us,", "we", "were", "all", "going", "direct", "to", - "Heaven,", "we", "were", "all", "going", "direct", "the", "other", - "way"}; - int nextInt = 0; - for(int i=0; i < 21000; ++i) { - // Write out the same string twice, this guarantees the fraction of rows with - // distinct strings is 0.5 - if (i % 2 == 0) { - nextInt = r1.nextInt(words.length); - // Append the value of i to the word, this guarantees when an index or word is repeated - // the actual string is unique. - words[nextInt] += "-" + i; - } - appendMyRecord(batch, r1.nextInt(), r1.nextLong(), words[nextInt]); - if (batch.size == batch.getMaxSize()) { - writer.addRowBatch(batch); - batch.reset(); - } - } - if (batch.size != 0) { - writer.addRowBatch(batch); - } - writer.close(); - PrintStream origOut = System.out; - String outputFilename = "orc-file-dump-dictionary-threshold.out"; - FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename); - - // replace stdout and run command - System.setOut(new PrintStream(myOut)); - FileDump.main(new String[]{testFilePath.toString(), "--rowindex=1,2,3"}); - System.out.flush(); - System.setOut(origOut); - - checkOutput(outputFilename, workDir + File.separator + outputFilename); - } - - @Test - public void testBloomFilter() throws Exception { - TypeDescription schema = getMyRecordType(); - conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION"); - OrcFile.WriterOptions options = OrcFile.writerOptions(conf) - .fileSystem(fs) - .setSchema(schema) - .stripeSize(100000) - .compress(CompressionKind.ZLIB) - .bufferSize(10000) - .rowIndexStride(1000) - .bloomFilterColumns("S"); - Writer writer = OrcFile.createWriter(testFilePath, options); - Random r1 = new Random(1); - String[] words = new String[]{"It", "was", "the", "best", "of", "times,", - "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age", - "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it", - "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch", - "of", "incredulity,", "it", "was", "the", "season", "of", "Light,", - "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the", - "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,", - "we", "had", "everything", "before", "us,", "we", "had", "nothing", - "before", "us,", "we", "were", "all", "going", "direct", "to", - "Heaven,", "we", "were", "all", "going", "direct", "the", "other", - "way"}; - VectorizedRowBatch batch = schema.createRowBatch(1000); - for(int i=0; i < 21000; ++i) { - appendMyRecord(batch, r1.nextInt(), r1.nextLong(), - words[r1.nextInt(words.length)]); - if (batch.size == batch.getMaxSize()) { - writer.addRowBatch(batch); - batch.reset(); - } - } - if (batch.size > 0) { - writer.addRowBatch(batch); - } - writer.close(); - PrintStream origOut = System.out; - String outputFilename = "orc-file-dump-bloomfilter.out"; - FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename); - - // replace stdout and run command - System.setOut(new PrintStream(myOut)); - FileDump.main(new String[]{testFilePath.toString(), "--rowindex=3"}); - System.out.flush(); - System.setOut(origOut); - - - checkOutput(outputFilename, workDir + File.separator + outputFilename); - } - - @Test - public void testBloomFilter2() throws Exception { - TypeDescription schema = getMyRecordType(); - conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION"); - OrcFile.WriterOptions options = OrcFile.writerOptions(conf) - .fileSystem(fs) - .setSchema(schema) - .stripeSize(100000) - .compress(CompressionKind.ZLIB) - .bufferSize(10000) - .rowIndexStride(1000) - .bloomFilterColumns("l") - .bloomFilterFpp(0.01); - VectorizedRowBatch batch = schema.createRowBatch(1000); - Writer writer = OrcFile.createWriter(testFilePath, options); - Random r1 = new Random(1); - String[] words = new String[]{"It", "was", "the", "best", "of", "times,", - "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age", - "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it", - "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch", - "of", "incredulity,", "it", "was", "the", "season", "of", "Light,", - "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the", - "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,", - "we", "had", "everything", "before", "us,", "we", "had", "nothing", - "before", "us,", "we", "were", "all", "going", "direct", "to", - "Heaven,", "we", "were", "all", "going", "direct", "the", "other", - "way"}; - for(int i=0; i < 21000; ++i) { - appendMyRecord(batch, r1.nextInt(), r1.nextLong(), - words[r1.nextInt(words.length)]); - if (batch.size == batch.getMaxSize()) { - writer.addRowBatch(batch); - batch.reset(); - } - } - if (batch.size > 0) { - writer.addRowBatch(batch); - } - writer.close(); - PrintStream origOut = System.out; - String outputFilename = "orc-file-dump-bloomfilter2.out"; - FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename); - - // replace stdout and run command - System.setOut(new PrintStream(myOut)); - FileDump.main(new String[]{testFilePath.toString(), "--rowindex=2"}); - System.out.flush(); - System.setOut(origOut); - - - checkOutput(outputFilename, workDir + File.separator + outputFilename); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/test/org/apache/orc/tools/TestJsonFileDump.java ---------------------------------------------------------------------- diff --git a/orc/src/test/org/apache/orc/tools/TestJsonFileDump.java b/orc/src/test/org/apache/orc/tools/TestJsonFileDump.java deleted file mode 100644 index a514824..0000000 --- a/orc/src/test/org/apache/orc/tools/TestJsonFileDump.java +++ /dev/null @@ -1,150 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.orc.tools; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.PrintStream; -import java.net.URL; -import java.util.Random; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.orc.CompressionKind; -import org.apache.orc.OrcConf; -import org.apache.orc.OrcFile; -import org.apache.orc.TypeDescription; -import org.apache.orc.Writer; -import org.junit.Before; -import org.junit.Test; - -public class TestJsonFileDump { - public static String getFileFromClasspath(String name) { - URL url = ClassLoader.getSystemResource(name); - if (url == null) { - throw new IllegalArgumentException("Could not find " + name); - } - return url.getPath(); - } - - Path workDir = new Path(System.getProperty("test.tmp.dir")); - Configuration conf; - FileSystem fs; - Path testFilePath; - - @Before - public void openFileSystem () throws Exception { - conf = new Configuration(); - fs = FileSystem.getLocal(conf); - fs.setWorkingDirectory(workDir); - testFilePath = new Path("TestFileDump.testDump.orc"); - fs.delete(testFilePath, false); - } - - static void checkOutput(String expected, - String actual) throws Exception { - BufferedReader eStream = - new BufferedReader(new FileReader(getFileFromClasspath(expected))); - BufferedReader aStream = - new BufferedReader(new FileReader(actual)); - String expectedLine = eStream.readLine(); - while (expectedLine != null) { - String actualLine = aStream.readLine(); - System.out.println("actual: " + actualLine); - System.out.println("expected: " + expectedLine); - assertEquals(expectedLine, actualLine); - expectedLine = eStream.readLine(); - } - assertNull(eStream.readLine()); - assertNull(aStream.readLine()); - } - - @Test - public void testJsonDump() throws Exception { - TypeDescription schema = TypeDescription.createStruct() - .addField("i", TypeDescription.createInt()) - .addField("l", TypeDescription.createLong()) - .addField("s", TypeDescription.createString()); - conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION"); - OrcFile.WriterOptions options = OrcFile.writerOptions(conf) - .fileSystem(fs) - .setSchema(schema) - .stripeSize(100000) - .compress(CompressionKind.ZLIB) - .bufferSize(10000) - .rowIndexStride(1000) - .bloomFilterColumns("s"); - Writer writer = OrcFile.createWriter(testFilePath, options); - Random r1 = new Random(1); - String[] words = new String[]{"It", "was", "the", "best", "of", "times,", - "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age", - "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it", - "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch", - "of", "incredulity,", "it", "was", "the", "season", "of", "Light,", - "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the", - "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,", - "we", "had", "everything", "before", "us,", "we", "had", "nothing", - "before", "us,", "we", "were", "all", "going", "direct", "to", - "Heaven,", "we", "were", "all", "going", "direct", "the", "other", - "way"}; - VectorizedRowBatch batch = schema.createRowBatch(1000); - for(int i=0; i < 21000; ++i) { - ((LongColumnVector) batch.cols[0]).vector[batch.size] = r1.nextInt(); - ((LongColumnVector) batch.cols[1]).vector[batch.size] = r1.nextLong(); - if (i % 100 == 0) { - batch.cols[2].noNulls = false; - batch.cols[2].isNull[batch.size] = true; - } else { - ((BytesColumnVector) batch.cols[2]).setVal(batch.size, - words[r1.nextInt(words.length)].getBytes()); - } - batch.size += 1; - if (batch.size == batch.getMaxSize()) { - writer.addRowBatch(batch); - batch.reset(); - } - } - if (batch.size > 0) { - writer.addRowBatch(batch); - } - - writer.close(); - PrintStream origOut = System.out; - String outputFilename = "orc-file-dump.json"; - FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename); - - // replace stdout and run command - System.setOut(new PrintStream(myOut)); - FileDump.main(new String[]{testFilePath.toString(), "-j", "-p", "--rowindex=3"}); - System.out.flush(); - System.setOut(origOut); - - - checkOutput(outputFilename, workDir + File.separator + outputFilename); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/test/resources/orc-file-11-format.orc ---------------------------------------------------------------------- diff --git a/orc/src/test/resources/orc-file-11-format.orc b/orc/src/test/resources/orc-file-11-format.orc deleted file mode 100644 index 41653c8..0000000 Binary files a/orc/src/test/resources/orc-file-11-format.orc and /dev/null differ http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/test/resources/orc-file-dump-bloomfilter.out ---------------------------------------------------------------------- diff --git a/orc/src/test/resources/orc-file-dump-bloomfilter.out b/orc/src/test/resources/orc-file-dump-bloomfilter.out deleted file mode 100644 index 18fd2fb..0000000 --- a/orc/src/test/resources/orc-file-dump-bloomfilter.out +++ /dev/null @@ -1,179 +0,0 @@ -Structure for TestFileDump.testDump.orc -File Version: 0.12 with HIVE_13083 -Rows: 21000 -Compression: ZLIB -Compression size: 4096 -Type: struct<i:int,l:bigint,s:string> - -Stripe Statistics: - Stripe 1: - Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 515792826 - Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 9221614132680747961 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280 - Stripe 2: - Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 7673427 - Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 9222259462014003839 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504 - Stripe 3: - Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 132660742551 - Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641 - Stripe 4: - Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 8533549236 - Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470 - Stripe 5: - Column 0: count: 1000 hasNull: false - Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 - Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 - Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 - -File Statistics: - Column 0: count: 21000 hasNull: false - Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 193017464403 - Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 9222303228623055266 - Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761 - -Stripes: - Stripe: offset: 3 data: 63786 rows: 5000 tail: 86 index: 951 - Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 166 - Stream: column 2 section ROW_INDEX start: 186 length 169 - Stream: column 3 section ROW_INDEX start: 355 length 87 - Stream: column 3 section BLOOM_FILTER start: 442 length 512 - Stream: column 1 section DATA start: 954 length 20035 - Stream: column 2 section DATA start: 20989 length 40050 - Stream: column 3 section DATA start: 61039 length 3543 - Stream: column 3 section LENGTH start: 64582 length 25 - Stream: column 3 section DICTIONARY_DATA start: 64607 length 133 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 - Encoding column 3: DICTIONARY_V2[35] - Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3862 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3884 positions: 0,659,149 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3893 positions: 0,1531,3 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 positions: 0,2281,32 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 positions: 0,3033,45 - Bloom filters for column 3: - Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe: offset: 64826 data: 63775 rows: 5000 tail: 86 index: 944 - Stream: column 0 section ROW_INDEX start: 64826 length 17 - Stream: column 1 section ROW_INDEX start: 64843 length 164 - Stream: column 2 section ROW_INDEX start: 65007 length 168 - Stream: column 3 section ROW_INDEX start: 65175 length 83 - Stream: column 3 section BLOOM_FILTER start: 65258 length 512 - Stream: column 1 section DATA start: 65770 length 20035 - Stream: column 2 section DATA start: 85805 length 40050 - Stream: column 3 section DATA start: 125855 length 3532 - Stream: column 3 section LENGTH start: 129387 length 25 - Stream: column 3 section DICTIONARY_DATA start: 129412 length 133 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 - Encoding column 3: DICTIONARY_V2[35] - Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3923 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3869 positions: 0,761,12 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,1472,70 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 positions: 0,2250,43 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 positions: 0,2978,88 - Bloom filters for column 3: - Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe: offset: 129631 data: 63787 rows: 5000 tail: 86 index: 950 - Stream: column 0 section ROW_INDEX start: 129631 length 17 - Stream: column 1 section ROW_INDEX start: 129648 length 163 - Stream: column 2 section ROW_INDEX start: 129811 length 168 - Stream: column 3 section ROW_INDEX start: 129979 length 90 - Stream: column 3 section BLOOM_FILTER start: 130069 length 512 - Stream: column 1 section DATA start: 130581 length 20035 - Stream: column 2 section DATA start: 150616 length 40050 - Stream: column 3 section DATA start: 190666 length 3544 - Stream: column 3 section LENGTH start: 194210 length 25 - Stream: column 3 section DICTIONARY_DATA start: 194235 length 133 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 - Encoding column 3: DICTIONARY_V2[35] - Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 4008 positions: 0,634,174 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3999 positions: 0,1469,69 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,2133,194 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 positions: 0,3005,43 - Bloom filters for column 3: - Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe: offset: 194454 data: 63817 rows: 5000 tail: 86 index: 952 - Stream: column 0 section ROW_INDEX start: 194454 length 17 - Stream: column 1 section ROW_INDEX start: 194471 length 165 - Stream: column 2 section ROW_INDEX start: 194636 length 167 - Stream: column 3 section ROW_INDEX start: 194803 length 91 - Stream: column 3 section BLOOM_FILTER start: 194894 length 512 - Stream: column 1 section DATA start: 195406 length 20035 - Stream: column 2 section DATA start: 215441 length 40050 - Stream: column 3 section DATA start: 255491 length 3574 - Stream: column 3 section LENGTH start: 259065 length 25 - Stream: column 3 section DICTIONARY_DATA start: 259090 length 133 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 - Encoding column 3: DICTIONARY_V2[35] - Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3901 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3900 positions: 0,431,431 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3909 positions: 0,1485,52 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 positions: 0,2196,104 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 positions: 0,2934,131 - Bloom filters for column 3: - Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe: offset: 259309 data: 12943 rows: 1000 tail: 78 index: 432 - Stream: column 0 section ROW_INDEX start: 259309 length 12 - Stream: column 1 section ROW_INDEX start: 259321 length 38 - Stream: column 2 section ROW_INDEX start: 259359 length 41 - Stream: column 3 section ROW_INDEX start: 259400 length 40 - Stream: column 3 section BLOOM_FILTER start: 259440 length 301 - Stream: column 1 section DATA start: 259741 length 4007 - Stream: column 2 section DATA start: 263748 length 8010 - Stream: column 3 section DATA start: 271758 length 768 - Stream: column 3 section LENGTH start: 272526 length 25 - Stream: column 3 section DICTIONARY_DATA start: 272551 length 133 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 - Encoding column 3: DICTIONARY_V2[35] - Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0 - Bloom filters for column 3: - Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - -File length: 273307 bytes -Padding length: 0 bytes -Padding ratio: 0% -________________________________________________________________________________________________________________________ -