http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java deleted file mode 100644 index 41a211b..0000000 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java +++ /dev/null @@ -1,261 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import static org.junit.Assert.assertEquals; - -import java.io.File; -import java.util.Random; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.io.Text; -import org.apache.orc.CompressionKind; -import org.apache.orc.OrcProto; - -import org.apache.orc.StripeInformation; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TestName; - -public class TestStringDictionary { - - Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test" - + File.separator + "tmp")); - - Configuration conf; - FileSystem fs; - Path testFilePath; - - @Rule - public TestName testCaseName = new TestName(); - - @Before - public void openFileSystem() throws Exception { - conf = new Configuration(); - fs = FileSystem.getLocal(conf); - testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc"); - fs.delete(testFilePath, false); - } - - @Test - public void testTooManyDistinct() throws Exception { - ObjectInspector inspector; - synchronized (TestOrcFile.class) { - inspector = ObjectInspectorFactory.getReflectionObjectInspector(Text.class, - ObjectInspectorFactory.ObjectInspectorOptions.JAVA); - } - - Writer writer = OrcFile.createWriter( - testFilePath, - OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE) - .bufferSize(10000)); - for (int i = 0; i < 20000; i++) { - writer.addRow(new Text(String.valueOf(i))); - } - writer.close(); - - Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); - RecordReader rows = reader.rows(); - int idx = 0; - while (rows.hasNext()) { - Object row = rows.next(null); - assertEquals(new Text(String.valueOf(idx++)), row); - } - - // make sure the encoding type is correct - for (StripeInformation stripe : reader.getStripes()) { - // hacky but does the job, this casting will work as long this test resides - // within the same package as ORC reader - OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe); - for (int i = 0; i < footer.getColumnsCount(); ++i) { - OrcProto.ColumnEncoding encoding = footer.getColumns(i); - assertEquals(OrcProto.ColumnEncoding.Kind.DIRECT_V2, encoding.getKind()); - } - } - } - - @Test - public void testHalfDistinct() throws Exception { - ObjectInspector inspector; - synchronized (TestOrcFile.class) { - inspector = ObjectInspectorFactory.getReflectionObjectInspector(Text.class, - ObjectInspectorFactory.ObjectInspectorOptions.JAVA); - } - - Writer writer = OrcFile.createWriter( - testFilePath, - OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE) - .bufferSize(10000)); - Random rand = new Random(123); - int[] input = new int[20000]; - for (int i = 0; i < 20000; i++) { - input[i] = rand.nextInt(10000); - } - - for (int i = 0; i < 20000; i++) { - writer.addRow(new Text(String.valueOf(input[i]))); - } - writer.close(); - - Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); - RecordReader rows = reader.rows(); - int idx = 0; - while (rows.hasNext()) { - Object row = rows.next(null); - assertEquals(new Text(String.valueOf(input[idx++])), row); - } - - // make sure the encoding type is correct - for (StripeInformation stripe : reader.getStripes()) { - // hacky but does the job, this casting will work as long this test resides - // within the same package as ORC reader - OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe); - for (int i = 0; i < footer.getColumnsCount(); ++i) { - OrcProto.ColumnEncoding encoding = footer.getColumns(i); - assertEquals(OrcProto.ColumnEncoding.Kind.DICTIONARY_V2, encoding.getKind()); - } - } - } - - @Test - public void testTooManyDistinctCheckDisabled() throws Exception { - ObjectInspector inspector; - synchronized (TestOrcFile.class) { - inspector = ObjectInspectorFactory.getReflectionObjectInspector(Text.class, - ObjectInspectorFactory.ObjectInspectorOptions.JAVA); - } - - conf.setBoolean(ConfVars.HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK.varname, false); - Writer writer = OrcFile.createWriter( - testFilePath, - OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE) - .bufferSize(10000)); - for (int i = 0; i < 20000; i++) { - writer.addRow(new Text(String.valueOf(i))); - } - writer.close(); - - Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); - RecordReader rows = reader.rows(); - int idx = 0; - while (rows.hasNext()) { - Object row = rows.next(null); - assertEquals(new Text(String.valueOf(idx++)), row); - } - - // make sure the encoding type is correct - for (StripeInformation stripe : reader.getStripes()) { - // hacky but does the job, this casting will work as long this test resides - // within the same package as ORC reader - OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe); - for (int i = 0; i < footer.getColumnsCount(); ++i) { - OrcProto.ColumnEncoding encoding = footer.getColumns(i); - assertEquals(OrcProto.ColumnEncoding.Kind.DIRECT_V2, encoding.getKind()); - } - } - } - - @Test - public void testHalfDistinctCheckDisabled() throws Exception { - ObjectInspector inspector; - synchronized (TestOrcFile.class) { - inspector = ObjectInspectorFactory.getReflectionObjectInspector(Text.class, - ObjectInspectorFactory.ObjectInspectorOptions.JAVA); - } - - conf.setBoolean(ConfVars.HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK.varname, false); - Writer writer = OrcFile.createWriter( - testFilePath, - OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE) - .bufferSize(10000)); - Random rand = new Random(123); - int[] input = new int[20000]; - for (int i = 0; i < 20000; i++) { - input[i] = rand.nextInt(10000); - } - - for (int i = 0; i < 20000; i++) { - writer.addRow(new Text(String.valueOf(input[i]))); - } - writer.close(); - - Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); - RecordReader rows = reader.rows(); - int idx = 0; - while (rows.hasNext()) { - Object row = rows.next(null); - assertEquals(new Text(String.valueOf(input[idx++])), row); - } - - // make sure the encoding type is correct - for (StripeInformation stripe : reader.getStripes()) { - // hacky but does the job, this casting will work as long this test resides - // within the same package as ORC reader - OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe); - for (int i = 0; i < footer.getColumnsCount(); ++i) { - OrcProto.ColumnEncoding encoding = footer.getColumns(i); - assertEquals(OrcProto.ColumnEncoding.Kind.DICTIONARY_V2, encoding.getKind()); - } - } - } - - @Test - public void testTooManyDistinctV11AlwaysDictionary() throws Exception { - ObjectInspector inspector; - synchronized (TestOrcFile.class) { - inspector = ObjectInspectorFactory.getReflectionObjectInspector(Text.class, - ObjectInspectorFactory.ObjectInspectorOptions.JAVA); - } - - Writer writer = OrcFile.createWriter( - testFilePath, - OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE) - .version(OrcFile.Version.V_0_11).bufferSize(10000)); - for (int i = 0; i < 20000; i++) { - writer.addRow(new Text(String.valueOf(i))); - } - writer.close(); - - Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); - RecordReader rows = reader.rows(); - int idx = 0; - while (rows.hasNext()) { - Object row = rows.next(null); - assertEquals(new Text(String.valueOf(idx++)), row); - } - - // make sure the encoding type is correct - for (StripeInformation stripe : reader.getStripes()) { - // hacky but does the job, this casting will work as long this test resides - // within the same package as ORC reader - OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe); - for (int i = 0; i < footer.getColumnsCount(); ++i) { - OrcProto.ColumnEncoding encoding = footer.getColumns(i); - assertEquals(OrcProto.ColumnEncoding.Kind.DICTIONARY, encoding.getKind()); - } - } - - } - -}
http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java deleted file mode 100644 index 96af65a..0000000 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java +++ /dev/null @@ -1,68 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import static org.junit.Assert.assertEquals; - -import org.apache.orc.TypeDescription; -import org.junit.Test; - -public class TestTypeDescription { - - @Test - public void testJson() { - TypeDescription bin = TypeDescription.createBinary(); - assertEquals("{\"category\": \"binary\", \"id\": 0, \"max\": 0}", - bin.toJson()); - assertEquals("binary", bin.toString()); - TypeDescription struct = TypeDescription.createStruct() - .addField("f1", TypeDescription.createInt()) - .addField("f2", TypeDescription.createString()) - .addField("f3", TypeDescription.createDecimal()); - assertEquals("struct<f1:int,f2:string,f3:decimal(38,10)>", - struct.toString()); - assertEquals("{\"category\": \"struct\", \"id\": 0, \"max\": 3, \"fields\": [\n" - + " \"f1\": {\"category\": \"int\", \"id\": 1, \"max\": 1},\n" - + " \"f2\": {\"category\": \"string\", \"id\": 2, \"max\": 2},\n" - + " \"f3\": {\"category\": \"decimal\", \"id\": 3, \"max\": 3, \"precision\": 38, \"scale\": 10}]}", - struct.toJson()); - struct = TypeDescription.createStruct() - .addField("f1", TypeDescription.createUnion() - .addUnionChild(TypeDescription.createByte()) - .addUnionChild(TypeDescription.createDecimal() - .withPrecision(20).withScale(10))) - .addField("f2", TypeDescription.createStruct() - .addField("f3", TypeDescription.createDate()) - .addField("f4", TypeDescription.createDouble()) - .addField("f5", TypeDescription.createBoolean())) - .addField("f6", TypeDescription.createChar().withMaxLength(100)); - assertEquals("struct<f1:uniontype<tinyint,decimal(20,10)>,f2:struct<f3:date,f4:double,f5:boolean>,f6:char(100)>", - struct.toString()); - assertEquals( - "{\"category\": \"struct\", \"id\": 0, \"max\": 8, \"fields\": [\n" + - " \"f1\": {\"category\": \"uniontype\", \"id\": 1, \"max\": 3, \"children\": [\n" + - " {\"category\": \"tinyint\", \"id\": 2, \"max\": 2},\n" + - " {\"category\": \"decimal\", \"id\": 3, \"max\": 3, \"precision\": 20, \"scale\": 10}]},\n" + - " \"f2\": {\"category\": \"struct\", \"id\": 4, \"max\": 7, \"fields\": [\n" + - " \"f3\": {\"category\": \"date\", \"id\": 5, \"max\": 5},\n" + - " \"f4\": {\"category\": \"double\", \"id\": 6, \"max\": 6},\n" + - " \"f5\": {\"category\": \"boolean\", \"id\": 7, \"max\": 7}]},\n" + - " \"f6\": {\"category\": \"char\", \"id\": 8, \"max\": 8, \"length\": 100}]}", - struct.toJson()); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestUnrolledBitPack.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestUnrolledBitPack.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestUnrolledBitPack.java deleted file mode 100644 index 3251731..0000000 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestUnrolledBitPack.java +++ /dev/null @@ -1,114 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io.orc; - -import static org.junit.Assert.assertEquals; - -import java.io.File; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.io.LongWritable; -import org.apache.orc.CompressionKind; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TestName; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -import com.google.common.collect.Lists; -import com.google.common.primitives.Longs; - -@RunWith(value = Parameterized.class) -public class TestUnrolledBitPack { - - private long val; - - public TestUnrolledBitPack(long val) { - this.val = val; - } - - @Parameters - public static Collection<Object[]> data() { - Object[][] data = new Object[][] { { -1 }, { 1 }, { 7 }, { -128 }, { 32000 }, { 8300000 }, - { Integer.MAX_VALUE }, { 540000000000L }, { 140000000000000L }, { 36000000000000000L }, - { Long.MAX_VALUE } }; - return Arrays.asList(data); - } - - Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test" - + File.separator + "tmp")); - - Configuration conf; - FileSystem fs; - Path testFilePath; - - @Rule - public TestName testCaseName = new TestName(); - - @Before - public void openFileSystem() throws Exception { - conf = new Configuration(); - fs = FileSystem.getLocal(conf); - testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc"); - fs.delete(testFilePath, false); - } - - @Test - public void testBitPacking() throws Exception { - ObjectInspector inspector; - synchronized (TestOrcFile.class) { - inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class, - ObjectInspectorFactory.ObjectInspectorOptions.JAVA); - } - - long[] inp = new long[] { val, 0, val, val, 0, val, 0, val, val, 0, val, 0, val, val, 0, 0, - val, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, - 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, - 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, - val, 0, val, 0, 0, val, 0, val, 0, 0, val, val }; - List<Long> input = Lists.newArrayList(Longs.asList(inp)); - - Writer writer = OrcFile.createWriter( - testFilePath, - OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000) - .compress(CompressionKind.NONE).bufferSize(10000)); - for (Long l : input) { - writer.addRow(l); - } - writer.close(); - - Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); - RecordReader rows = reader.rows(); - int idx = 0; - while (rows.hasNext()) { - Object row = rows.next(null); - assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get()); - } - } - -}