Repository: hive Updated Branches: refs/heads/master bde615234 -> 169e65592
http://git-wip-us.apache.org/repos/asf/hive/blob/169e6559/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloIndexParameters.java ---------------------------------------------------------------------- diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloIndexParameters.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloIndexParameters.java new file mode 100644 index 0000000..d295c7b --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloIndexParameters.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.accumulo.serde; + +import org.apache.accumulo.core.security.Authorizations; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.accumulo.AccumuloDefaultIndexScanner; +import org.apache.hadoop.hive.accumulo.AccumuloIndexScanner; +import org.apache.hadoop.hive.accumulo.AccumuloIndexScannerException; + +import java.nio.charset.StandardCharsets; +import java.util.HashSet; +import java.util.Set; + + +/** + * Accumulo Index Parameters for Hive tables. + */ +public class AccumuloIndexParameters { + public static final int DEFAULT_MAX_ROWIDS = 20000; + public static final String INDEX_SCANNER = "accumulo.index.scanner"; + public static final String MAX_INDEX_ROWS = "accumulo.index.rows.max"; + public static final String INDEXED_COLUMNS = "accumulo.indexed.columns"; + public static final String INDEXTABLE_NAME = "accumulo.indextable.name"; + private static final Set<String> EMPTY_SET = new HashSet<String>(); + private Configuration conf; + + public AccumuloIndexParameters(Configuration conf) { + this.conf = conf; + } + + public String getIndexTable() { + return this.conf.get(INDEXTABLE_NAME); + } + + public int getMaxIndexRows() { + return this.conf.getInt(MAX_INDEX_ROWS, DEFAULT_MAX_ROWIDS); + } + + public final Set<String> getIndexColumns() { + String colmap = conf.get(INDEXED_COLUMNS); + if (colmap != null) { + Set<String> cols = new HashSet<String>(); + for (String col : colmap.split(",")) { + cols.add(col.trim()); + } + return cols; + } + return EMPTY_SET; + } + + + public final Authorizations getTableAuths() { + String auths = conf.get(AccumuloSerDeParameters.AUTHORIZATIONS_KEY); + if (auths != null && !auths.isEmpty()) { + return new Authorizations(auths.trim().getBytes(StandardCharsets.UTF_8)); + } + return new Authorizations(); + } + + public Configuration getConf() { + return conf; + } + + public final AccumuloIndexScanner createScanner() throws AccumuloIndexScannerException { + AccumuloIndexScanner handler; + + String classname = conf.get(INDEX_SCANNER); + if (classname != null) { + try { + handler = (AccumuloIndexScanner) Class.forName(classname).newInstance(); + } catch (ClassCastException | InstantiationException | IllegalAccessException + | ClassNotFoundException e) { + throw new AccumuloIndexScannerException("Cannot use index scanner class: " + classname, e); + } + } else { + handler = new AccumuloDefaultIndexScanner(); + } + if (handler != null) { + handler.init(conf); + } + return handler; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/169e6559/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDeParameters.java ---------------------------------------------------------------------- diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDeParameters.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDeParameters.java index 09c5f24..ef454f0 100644 --- a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDeParameters.java +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDeParameters.java @@ -17,9 +17,11 @@ package org.apache.hadoop.hive.accumulo.serde; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.NoSuchElementException; import java.util.Properties; +import java.util.Set; import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.core.security.ColumnVisibility; @@ -58,12 +60,21 @@ public class AccumuloSerDeParameters extends AccumuloConnectionParameters { public static final String COMPOSITE_ROWID_FACTORY = "accumulo.composite.rowid.factory"; public static final String COMPOSITE_ROWID_CLASS = "accumulo.composite.rowid"; + public static final int DEFAULT_MAX_ROWIDS = 20000; + public static final String INDEX_SCANNER = "accumulo.index.scanner"; + public static final String MAX_INDEX_ROWS = "accumulo.index.rows.max"; + public static final String INDEXED_COLUMNS = "accumulo.indexed.columns"; + public static final String INDEXTABLE_NAME = "accumulo.indextable.name"; + private static final Set<String> EMPTY_SET = new HashSet<String>(); + + protected final ColumnMapper columnMapper; private Properties tableProperties; private String serdeName; private LazySerDeParameters lazySerDeParameters; + private AccumuloIndexParameters indexParams; private AccumuloRowIdFactory rowIdFactory; public AccumuloSerDeParameters(Configuration conf, Properties tableProperties, String serdeName) @@ -73,6 +84,7 @@ public class AccumuloSerDeParameters extends AccumuloConnectionParameters { this.serdeName = serdeName; lazySerDeParameters = new LazySerDeParameters(conf, tableProperties, serdeName); + indexParams = new AccumuloIndexParameters(conf); // The default encoding for this table when not otherwise specified String defaultStorage = tableProperties.getProperty(DEFAULT_STORAGE_TYPE); @@ -135,10 +147,17 @@ public class AccumuloSerDeParameters extends AccumuloConnectionParameters { return new DefaultAccumuloRowIdFactory(); } + public AccumuloIndexParameters getIndexParams() { + return indexParams; + } + public LazySerDeParameters getSerDeParameters() { + return lazySerDeParameters; } + + public Properties getTableProperties() { return tableProperties; } http://git-wip-us.apache.org/repos/asf/hive/blob/169e6559/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/package-info.java ---------------------------------------------------------------------- diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/package-info.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/package-info.java new file mode 100644 index 0000000..7311e87 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/package-info.java @@ -0,0 +1,4 @@ +/** + * accumulo serde classes + */ +package org.apache.hadoop.hive.accumulo.serde; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/169e6559/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloDefaultIndexScanner.java ---------------------------------------------------------------------- diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloDefaultIndexScanner.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloDefaultIndexScanner.java new file mode 100644 index 0000000..7d6cc0e --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloDefaultIndexScanner.java @@ -0,0 +1,218 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.accumulo; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.BatchWriter; +import org.apache.accumulo.core.client.BatchWriterConfig; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.MutationsRejectedException; +import org.apache.accumulo.core.client.TableExistsException; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.client.mock.MockInstance; +import org.apache.accumulo.core.client.security.tokens.PasswordToken; +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.data.Value; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.accumulo.serde.AccumuloIndexParameters; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.io.Text; +import org.junit.Test; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +public class TestAccumuloDefaultIndexScanner { + private static final Logger LOG = LoggerFactory.getLogger(TestAccumuloDefaultIndexScanner.class); + private static final Value EMPTY_VALUE = new Value(); + + private static void addRow(BatchWriter writer, String rowId, String cf, String cq) throws MutationsRejectedException { + Mutation mut = new Mutation(rowId); + mut.put(new Text(cf), new Text(cq), EMPTY_VALUE); + writer.addMutation(mut); + } + + private static void addRow(BatchWriter writer, Integer rowId, String cf, String cq) throws MutationsRejectedException { + Mutation mut = new Mutation(AccumuloIndexLexicoder.encodeValue(String.valueOf(rowId).getBytes(), "int", true)); + mut.put(new Text(cf), new Text(cq), EMPTY_VALUE); + writer.addMutation(mut); + } + + private static void addRow(BatchWriter writer, boolean rowId, String cf, String cq) throws MutationsRejectedException { + Mutation mut = new Mutation(String.valueOf(rowId)); + mut.put(new Text(cf), new Text(cq), EMPTY_VALUE); + writer.addMutation(mut); + } + + public static AccumuloDefaultIndexScanner buildMockHandler(int maxMatches) { + try { + String table = "table"; + Text emptyText = new Text(""); + Configuration conf = new Configuration(); + conf.set(AccumuloIndexParameters.INDEXTABLE_NAME, table); + conf.setInt(AccumuloIndexParameters.MAX_INDEX_ROWS, maxMatches); + conf.set(AccumuloIndexParameters.INDEXED_COLUMNS, "*"); + conf.set(serdeConstants.LIST_COLUMNS, "rid,name,age,cars,mgr"); + conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowId,name:name,age:age,cars:cars,mgr:mgr"); + AccumuloDefaultIndexScanner handler = new AccumuloDefaultIndexScanner(); + handler.init(conf); + + MockInstance inst = new MockInstance("test_instance"); + Connector conn = inst.getConnector("root", new PasswordToken("")); + if (!conn.tableOperations().exists(table)) { + conn.tableOperations().create(table); + BatchWriterConfig batchConfig = new BatchWriterConfig(); + BatchWriter writer = conn.createBatchWriter(table, batchConfig); + addRow(writer, "fred", "name_name", "row1"); + addRow(writer, "25", "age_age", "row1"); + addRow(writer, 5, "cars_cars", "row1"); + addRow(writer, true, "mgr_mgr", "row1"); + addRow(writer, "bill", "name_name", "row2"); + addRow(writer, "20", "age_age", "row2"); + addRow(writer, 2, "cars_cars", "row2"); + addRow(writer, false, "mgr_mgr", "row2"); + addRow(writer, "sally", "name_name", "row3"); + addRow(writer, "23", "age_age", "row3"); + addRow(writer, 6, "cars_cars", "row3"); + addRow(writer, true, "mgr_mgr", "row3"); + addRow(writer, "rob", "name_name", "row4"); + addRow(writer, "60", "age_age", "row4"); + addRow(writer, 1, "cars_cars", "row4"); + addRow(writer, false, "mgr_mgr", "row4"); + writer.close(); + } + AccumuloConnectionParameters connectionParams = Mockito + .mock(AccumuloConnectionParameters.class); + AccumuloStorageHandler storageHandler = Mockito.mock(AccumuloStorageHandler.class); + + Mockito.when(connectionParams.getConnector()).thenReturn(conn); + handler.setConnectParams(connectionParams); + return handler; + } catch (AccumuloSecurityException | AccumuloException | TableExistsException | TableNotFoundException e) { + LOG.error(e.getLocalizedMessage(), e); + } + return null; + } + + @Test + public void testMatchNone() { + AccumuloDefaultIndexScanner handler = buildMockHandler(10); + List<Range> ranges = handler.getIndexRowRanges("name", new Range("mike")); + assertEquals(0, ranges.size()); + } + + @Test + public void testMatchRange() { + AccumuloDefaultIndexScanner handler = buildMockHandler(10); + List<Range> ranges = handler.getIndexRowRanges("age", new Range("10", "50")); + assertEquals(3, ranges.size()); + assertTrue("does not contain row1", ranges.contains(new Range("row1"))); + assertTrue("does not contain row2", ranges.contains(new Range("row2"))); + assertTrue("does not contain row3", ranges.contains(new Range("row3"))); + } + + public void testTooManyMatches() { + AccumuloDefaultIndexScanner handler = buildMockHandler(2); + List<Range> ranges = handler.getIndexRowRanges("age", new Range("10", "50")); + assertNull("ranges should be null", ranges); + } + + @Test + public void testMatchExact() { + AccumuloDefaultIndexScanner handler = buildMockHandler(10); + List<Range> ranges = handler.getIndexRowRanges("age", new Range("20")); + assertEquals(1, ranges.size()); + assertTrue("does not contain row2", ranges.contains(new Range("row2"))); + } + + @Test + public void testValidIndex() { + Configuration conf = new Configuration(); + conf.set(AccumuloIndexParameters.INDEXED_COLUMNS, "name,age,phone,email"); + conf.set(AccumuloIndexParameters.INDEXTABLE_NAME, "contact"); + AccumuloDefaultIndexScanner handler = new AccumuloDefaultIndexScanner(); + handler.init(conf); + assertTrue("name is not identified as an index", handler.isIndexed("name")); + assertTrue("age is not identified as an index", handler.isIndexed("age")); + assertTrue("phone is not identified as an index", handler.isIndexed("phone")); + assertTrue("email is not identified as an index", handler.isIndexed("email")); + } + + @Test + public void testInvalidIndex() { + Configuration conf = new Configuration(); + conf.set(AccumuloIndexParameters.INDEXED_COLUMNS, "name,age,phone,email"); + conf.set(AccumuloIndexParameters.INDEXTABLE_NAME, "contact"); + AccumuloDefaultIndexScanner handler = new AccumuloDefaultIndexScanner(); + handler.init(conf); + assertFalse("mobile is identified as an index", handler.isIndexed("mobile")); + assertFalse("mail is identified as an index", handler.isIndexed("mail")); + } + + + @Test + public void testMissingTable() { + Configuration conf = new Configuration(); + conf.set(AccumuloIndexParameters.INDEXED_COLUMNS, "name,age,phone,email"); + AccumuloDefaultIndexScanner handler = new AccumuloDefaultIndexScanner(); + handler.init(conf); + assertFalse("name is identified as an index", handler.isIndexed("name")); + assertFalse("age is identified as an index", handler.isIndexed("age")); + } + + @Test + public void testWildcardIndex() { + Configuration conf = new Configuration(); + conf.set(AccumuloIndexParameters.INDEXED_COLUMNS, "*"); + conf.set(AccumuloIndexParameters.INDEXTABLE_NAME, "contact"); + AccumuloDefaultIndexScanner handler = new AccumuloDefaultIndexScanner(); + handler.init(conf); + assertTrue("name is not identified as an index", handler.isIndexed("name")); + assertTrue("age is not identified as an index", handler.isIndexed("age")); + } + + @Test + public void testNullIndex() { + Configuration conf = new Configuration(); + conf.set(AccumuloIndexParameters.INDEXTABLE_NAME, "contact"); + AccumuloDefaultIndexScanner handler = new AccumuloDefaultIndexScanner(); + handler.init(conf); + assertTrue("name is not identified as an index", handler.isIndexed("name")); + } + + @Test + public void testEmptyIndex() { + Configuration conf = new Configuration(); + conf.set(AccumuloIndexParameters.INDEXED_COLUMNS, ""); + conf.set(AccumuloIndexParameters.INDEXTABLE_NAME, "contact"); + AccumuloDefaultIndexScanner handler = new AccumuloDefaultIndexScanner(); + handler.init(conf); + assertFalse("name is identified as an index", handler.isIndexed("name")); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/169e6559/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloIndexLexicoder.java ---------------------------------------------------------------------- diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloIndexLexicoder.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloIndexLexicoder.java new file mode 100644 index 0000000..1eda364 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloIndexLexicoder.java @@ -0,0 +1,160 @@ +package org.apache.hadoop.hive.accumulo; + +import org.apache.accumulo.core.client.lexicoder.BigIntegerLexicoder; +import org.apache.accumulo.core.client.lexicoder.DoubleLexicoder; +import org.apache.accumulo.core.client.lexicoder.IntegerLexicoder; +import org.apache.accumulo.core.client.lexicoder.LongLexicoder; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.junit.Assert; +import org.junit.Test; + +import java.math.BigInteger; +import java.nio.ByteBuffer; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +/** + * + */ +public class TestAccumuloIndexLexicoder { + + @Test + public void testBooleanString() { + byte[] value = Boolean.TRUE.toString().getBytes(UTF_8); + assertArrayEquals(AccumuloIndexLexicoder.encodeValue(value, serdeConstants.BOOLEAN_TYPE_NAME, + true), value); + } + + @Test + public void testBooleanBinary() { + byte[] value = new byte[] { 1 }; + assertArrayEquals(AccumuloIndexLexicoder.encodeValue(value, serdeConstants.BOOLEAN_TYPE_NAME, + false), Boolean.TRUE.toString().getBytes(UTF_8)); + } + + @Test + public void testIntString() { + byte[] value = "10".getBytes(UTF_8); + byte[] encoded = new IntegerLexicoder().encode(10); + + byte[] lex = AccumuloIndexLexicoder.encodeValue(value, serdeConstants.INT_TYPE_NAME, true); + assertArrayEquals(lex, encoded); + + lex = AccumuloIndexLexicoder.encodeValue(value, serdeConstants.SMALLINT_TYPE_NAME, true); + assertArrayEquals(lex, encoded); + + lex = AccumuloIndexLexicoder.encodeValue(value, serdeConstants.TINYINT_TYPE_NAME, true); + assertArrayEquals(lex, encoded); + } + + @Test + public void testIntBinary() { + byte[] value = ByteBuffer.allocate(4).putInt(10).array(); + byte[] encoded = new IntegerLexicoder().encode(10); + + byte[] lex = AccumuloIndexLexicoder.encodeValue(value, serdeConstants.INT_TYPE_NAME, false); + assertArrayEquals(lex, encoded); + + value = ByteBuffer.allocate(2).putShort((short) 10).array(); + lex = AccumuloIndexLexicoder.encodeValue(value, serdeConstants.SMALLINT_TYPE_NAME, false); + assertArrayEquals(lex, encoded); + + value = ByteBuffer.allocate(1).put((byte)10).array(); + lex = AccumuloIndexLexicoder.encodeValue(value, serdeConstants.TINYINT_TYPE_NAME, false); + assertArrayEquals(lex, encoded); + } + + @Test + public void testFloatBinary() { + byte[] value = ByteBuffer.allocate(4).putFloat(10.55f).array(); + byte[] encoded = new DoubleLexicoder().encode((double)10.55f); + String val = new String(encoded); + + byte[] lex = AccumuloIndexLexicoder.encodeValue(value, serdeConstants.FLOAT_TYPE_NAME, false); + assertArrayEquals(lex, encoded); + + value = ByteBuffer.allocate(8).putDouble(10.55).array(); + encoded = new DoubleLexicoder().encode(10.55); + lex = AccumuloIndexLexicoder.encodeValue(value, serdeConstants.DOUBLE_TYPE_NAME, false); + assertArrayEquals(lex, encoded); + } + + @Test + public void testFloatString() { + byte[] value = "10.55".getBytes(UTF_8); + byte[] encoded = new DoubleLexicoder().encode(10.55); + + byte[] lex = AccumuloIndexLexicoder.encodeValue(value, serdeConstants.FLOAT_TYPE_NAME, true); + assertArrayEquals(lex, encoded); + + lex = AccumuloIndexLexicoder.encodeValue(value, serdeConstants.DOUBLE_TYPE_NAME, true); + assertArrayEquals(lex, encoded); + } + + @Test + public void testBigIntBinary() { + byte[] value = ByteBuffer.allocate(8).putLong(1232322323).array(); + byte[] encoded = new LongLexicoder().encode(1232322323L); + + byte[] lex = AccumuloIndexLexicoder.encodeValue(value, serdeConstants.BIGINT_TYPE_NAME, false); + assertArrayEquals(lex, encoded); + + value = new BigInteger( "1232322323", 10 ).toByteArray(); + encoded = new BigIntegerLexicoder().encode(new BigInteger("1232322323", 10 )); + lex = AccumuloIndexLexicoder.encodeValue(value, serdeConstants.DECIMAL_TYPE_NAME, false); + assertArrayEquals(lex, encoded); + } + + @Test + public void testDecimalString() { + String strVal = "12323232233434"; + byte[] value = strVal.getBytes(UTF_8); + byte[] encoded = new BigIntegerLexicoder().encode(new BigInteger(strVal, 10)); + + byte[] lex = AccumuloIndexLexicoder.encodeValue(value, serdeConstants.DECIMAL_TYPE_NAME, true); + assertArrayEquals(lex, encoded); + + + lex = AccumuloIndexLexicoder.encodeValue(value, "DECIMAL (10,3)", true); + assertArrayEquals(lex, encoded); + } + + @Test + public void testDecimalBinary() { + BigInteger value = new BigInteger("12323232233434", 10); + byte[] encoded = new BigIntegerLexicoder().encode(value); + + byte[] lex = AccumuloIndexLexicoder.encodeValue(value.toByteArray(), serdeConstants.DECIMAL_TYPE_NAME, false); + assertArrayEquals(lex, encoded); + } + + @Test + public void testDateString() { + String date = "2016-02-22"; + byte[] value = date.getBytes(UTF_8); + assertArrayEquals(AccumuloIndexLexicoder.encodeValue(value, serdeConstants.DATE_TYPE_NAME, + true), value); + } + + @Test + public void testDateTimeString() { + String timestamp = "2016-02-22 12:12:06.000000005"; + byte[] value = timestamp.getBytes(UTF_8); + assertArrayEquals(AccumuloIndexLexicoder.encodeValue(value, serdeConstants.TIMESTAMP_TYPE_NAME, + true), value); + } + + @Test + public void testString() { + String strVal = "The quick brown fox"; + byte[] value = strVal.getBytes(UTF_8); + assertArrayEquals(AccumuloIndexLexicoder.encodeValue(value, serdeConstants.STRING_TYPE_NAME, + true), value); + assertArrayEquals(AccumuloIndexLexicoder.encodeValue(value, "varChar(20)", + true), value); + assertArrayEquals(AccumuloIndexLexicoder.encodeValue(value, "CHAR (20)", + true), value); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/169e6559/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloIndexParameters.java ---------------------------------------------------------------------- diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloIndexParameters.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloIndexParameters.java new file mode 100644 index 0000000..976fd27 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloIndexParameters.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.accumulo; + +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.accumulo.serde.AccumuloIndexParameters; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; +import org.junit.Test; + +import java.util.List; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +public class TestAccumuloIndexParameters { + + public static class MockAccumuloIndexScanner implements AccumuloIndexScanner { + + @Override + public void init(Configuration conf) { + } + + @Override + public boolean isIndexed(String columnName) { + return false; + } + + @Override + public List<Range> getIndexRowRanges(String column, Range indexRange) { + return null; + } + } + + @Test + public void testDefaultScanner() { + try { + AccumuloIndexScanner scanner = new AccumuloIndexParameters(new Configuration()).createScanner(); + assertTrue(scanner instanceof AccumuloDefaultIndexScanner); + } catch (AccumuloIndexScannerException e) { + fail("Unexpected exception thrown"); + } + } + + @Test + public void testUserHandler() throws AccumuloIndexScannerException { + Configuration conf = new Configuration(); + conf.set(AccumuloIndexParameters.INDEX_SCANNER, MockAccumuloIndexScanner.class.getName()); + AccumuloIndexScanner scanner = new AccumuloIndexParameters(conf).createScanner(); + assertTrue(scanner instanceof MockAccumuloIndexScanner); + } + + @Test + public void testBadHandler() { + Configuration conf = new Configuration(); + conf.set(AccumuloIndexParameters.INDEX_SCANNER, "a.class.does.not.exist.IndexHandler"); + try { + AccumuloIndexScanner scanner = new AccumuloIndexParameters(conf).createScanner(); + } catch (AccumuloIndexScannerException e) { + return; + } + fail("Failed to throw exception for class not found"); + } + + @Test + public void getIndexColumns() { + Configuration conf = new Configuration(); + conf.set(AccumuloIndexParameters.INDEXED_COLUMNS, "a,b,c"); + Set<String> cols = new AccumuloIndexParameters(conf).getIndexColumns(); + assertEquals(3, cols.size()); + assertTrue("Missing column a", cols.contains("a")); + assertTrue("Missing column b", cols.contains("b")); + assertTrue("Missing column c", cols.contains("c")); + } + + @Test + public void getMaxIndexRows() { + Configuration conf = new Configuration(); + conf.setInt(AccumuloIndexParameters.MAX_INDEX_ROWS, 10); + int maxRows = new AccumuloIndexParameters(conf).getMaxIndexRows(); + assertEquals(10, maxRows); + } + + @Test + public void getAuths() { + Configuration conf = new Configuration(); + conf.set(AccumuloSerDeParameters.AUTHORIZATIONS_KEY, "public,open"); + Authorizations auths = new AccumuloIndexParameters(conf).getTableAuths(); + assertEquals(2, auths.size()); + assertTrue("Missing auth public", auths.contains("public")); + assertTrue("Missing auth open", auths.contains("open")); + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/169e6559/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloStorageHandler.java ---------------------------------------------------------------------- diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloStorageHandler.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloStorageHandler.java index 0aaa782..8d195ee 100644 --- a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloStorageHandler.java +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloStorageHandler.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.serde.serdeConstants; import org.junit.Assert; import org.junit.Before; import org.junit.Rule; @@ -59,6 +60,8 @@ public class TestAccumuloStorageHandler { Map<String,String> jobProperties = new HashMap<String,String>(); props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:cq1,cf:cq2,cf:cq3"); + props.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string:int:string"); + props.setProperty(serdeConstants.LIST_COLUMNS, "name,age,email"); props.setProperty(AccumuloSerDeParameters.TABLE_NAME, "table"); props.setProperty(AccumuloSerDeParameters.VISIBILITY_LABEL_KEY, "foo"); http://git-wip-us.apache.org/repos/asf/hive/blob/169e6559/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloPredicateHandler.java ---------------------------------------------------------------------- diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloPredicateHandler.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloPredicateHandler.java index 88e4530..0bb50e8 100644 --- a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloPredicateHandler.java +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloPredicateHandler.java @@ -488,6 +488,7 @@ public class TestAccumuloPredicateHandler { TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo); conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames)); conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,int,string"); + conf.set(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName()); String columnMappingStr = "cf:f1,cf:f2,:rowID"; conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr); columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, @@ -758,7 +759,7 @@ public class TestAccumuloPredicateHandler { String hiveRowIdColumnName = "rid"; Mockito.when(mockHandler.getRanges(conf, columnMapper)).thenCallRealMethod(); - Mockito.when(mockHandler.generateRanges(columnMapper, hiveRowIdColumnName, root)).thenReturn(null); + Mockito.when(mockHandler.generateRanges(conf, columnMapper, hiveRowIdColumnName, root)).thenReturn(null); Mockito.when(mockHandler.getExpression(conf)).thenReturn(root); // A null result from AccumuloRangeGenerator is all ranges @@ -776,7 +777,8 @@ public class TestAccumuloPredicateHandler { String hiveRowIdColumnName = "rid"; Mockito.when(mockHandler.getRanges(conf, columnMapper)).thenCallRealMethod(); - Mockito.when(mockHandler.generateRanges(columnMapper, hiveRowIdColumnName, root)).thenReturn(Collections.emptyList()); + Mockito.when(mockHandler.generateRanges(conf, columnMapper, hiveRowIdColumnName, root)) + .thenReturn(Collections.emptyList()); Mockito.when(mockHandler.getExpression(conf)).thenReturn(root); // A null result from AccumuloRangeGenerator is all ranges @@ -795,7 +797,7 @@ public class TestAccumuloPredicateHandler { Range r = new Range("a"); Mockito.when(mockHandler.getRanges(conf, columnMapper)).thenCallRealMethod(); - Mockito.when(mockHandler.generateRanges(columnMapper, hiveRowIdColumnName, root)).thenReturn(r); + Mockito.when(mockHandler.generateRanges(conf, columnMapper, hiveRowIdColumnName, root)).thenReturn(r); Mockito.when(mockHandler.getExpression(conf)).thenReturn(root); // A null result from AccumuloRangeGenerator is all ranges @@ -814,7 +816,8 @@ public class TestAccumuloPredicateHandler { Range r1 = new Range("a"), r2 = new Range("z"); Mockito.when(mockHandler.getRanges(conf, columnMapper)).thenCallRealMethod(); - Mockito.when(mockHandler.generateRanges(columnMapper, hiveRowIdColumnName, root)).thenReturn(Arrays.asList(r1, r2)); + Mockito.when(mockHandler.generateRanges(conf, columnMapper, hiveRowIdColumnName, root)) + .thenReturn(Arrays.asList(r1, r2)); Mockito.when(mockHandler.getExpression(conf)).thenReturn(root); // A null result from AccumuloRangeGenerator is all ranges http://git-wip-us.apache.org/repos/asf/hive/blob/169e6559/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloRangeGenerator.java ---------------------------------------------------------------------- diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloRangeGenerator.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloRangeGenerator.java index 339da07..5f3baab 100644 --- a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloRangeGenerator.java +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloRangeGenerator.java @@ -16,20 +16,15 @@ */ package org.apache.hadoop.hive.accumulo.predicate; -import static org.junit.Assert.assertNotNull; - -import java.sql.Date; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; - +import com.google.common.collect.Lists; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.accumulo.AccumuloHiveConstants; +import org.apache.hadoop.hive.accumulo.TestAccumuloDefaultIndexScanner; import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -42,22 +37,29 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.udf.UDFLike; import org.apache.hadoop.hive.ql.udf.UDFToString; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus; +import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import com.google.common.collect.Lists; +import java.sql.Date; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; + +import static org.junit.Assert.assertNotNull; /** * @@ -66,12 +68,14 @@ public class TestAccumuloRangeGenerator { private AccumuloPredicateHandler handler; private HiveAccumuloRowIdColumnMapping rowIdMapping; + private Configuration conf; @Before public void setup() { handler = AccumuloPredicateHandler.getInstance(); rowIdMapping = new HiveAccumuloRowIdColumnMapping(AccumuloHiveConstants.ROWID, - ColumnEncoding.STRING, "row", TypeInfoFactory.stringTypeInfo.toString()); + ColumnEncoding.STRING,"row", TypeInfoFactory.stringTypeInfo.toString()); + conf = new Configuration(true); } @Test @@ -108,7 +112,7 @@ public class TestAccumuloRangeGenerator { List<Range> expectedRanges = Arrays .asList(new Range(new Key("f"), true, new Key("m\0"), false)); - AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid"); + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid"); Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule,NodeProcessor> emptyMap(), null); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -163,7 +167,7 @@ public class TestAccumuloRangeGenerator { // Should generate (-inf,+inf) List<Range> expectedRanges = Arrays.asList(new Range()); - AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid"); + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid"); Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule,NodeProcessor> emptyMap(), null); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -236,7 +240,7 @@ public class TestAccumuloRangeGenerator { // Should generate ['q', +inf) List<Range> expectedRanges = Arrays.asList(new Range(new Key("q"), true, null, false)); - AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid"); + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid"); Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule,NodeProcessor> emptyMap(), null); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -291,7 +295,7 @@ public class TestAccumuloRangeGenerator { // Should generate [f,+inf) List<Range> expectedRanges = Arrays.asList(new Range(new Key("f"), true, null, false)); - AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid"); + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid"); Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule,NodeProcessor> emptyMap(), null); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -349,7 +353,7 @@ public class TestAccumuloRangeGenerator { List<Range> expectedRanges = Arrays.asList(new Range(new Key("2014-01-01"), true, new Key( "2014-07-01"), false)); - AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid"); + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid"); Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule,NodeProcessor> emptyMap(), null); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -397,7 +401,7 @@ public class TestAccumuloRangeGenerator { ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), Arrays.asList(key, cast)); - AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "key"); + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "key"); Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule,NodeProcessor> emptyMap(), null); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -446,7 +450,7 @@ public class TestAccumuloRangeGenerator { ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters); - AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid"); + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid"); Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule,NodeProcessor> emptyMap(), null); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -464,4 +468,161 @@ public class TestAccumuloRangeGenerator { Object result = nodeOutput.get(both); Assert.assertNull(result); } + + @Test + public void testRangeOverStringIndexedField() throws Exception { + // age >= '10' + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "age", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "10"); + List<ExprNodeDesc> children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), children); + assertNotNull(node); + + // age <= '50' + ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "age", null, + false); + ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "50"); + List<ExprNodeDesc> children2 = Lists.newArrayList(); + children2.add(column2); + children2.add(constant2); + ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrLessThan(), children2); + assertNotNull(node2); + + // And UDF + List<ExprNodeDesc> bothFilters = Lists.newArrayList(); + bothFilters.add(node); + bothFilters.add(node2); + ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPAnd(), bothFilters); + + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid"); + rangeGenerator.setIndexScanner(TestAccumuloDefaultIndexScanner.buildMockHandler(10)); + Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, + Collections.<Rule,NodeProcessor> emptyMap(), null); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList<Node> topNodes = new ArrayList<Node>(); + topNodes.add(both); + HashMap<Node,Object> nodeOutput = new HashMap<Node,Object>(); + + try { + ogw.startWalking(topNodes, nodeOutput); + } catch (SemanticException ex) { + throw new RuntimeException(ex); + } + + // Filters are using an index which should match 3 rows + Object result = nodeOutput.get(both); + if ( result instanceof List) { + List results = (List) result; + Assert.assertEquals(3, results.size()); + Assert.assertTrue("does not contain row1", results.contains(new Range("row1"))); + Assert.assertTrue("does not contain row2", results.contains(new Range("row2"))); + Assert.assertTrue("does not contain row3", results.contains(new Range("row3"))); + } else { + Assert.fail("Results not a list"); + } + } + + @Test + public void testRangeOverIntegerIndexedField() throws Exception { + // cars >= 2 + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "cars", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 2); + List<ExprNodeDesc> children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), children); + assertNotNull(node); + + // cars <= 9 + ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "cars", null, + false); + ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 9); + List<ExprNodeDesc> children2 = Lists.newArrayList(); + children2.add(column2); + children2.add(constant2); + ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, + new GenericUDFOPEqualOrLessThan(), children2); + assertNotNull(node2); + + // And UDF + List<ExprNodeDesc> bothFilters = Lists.newArrayList(); + bothFilters.add(node); + bothFilters.add(node2); + ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPAnd(), bothFilters); + + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid"); + rangeGenerator.setIndexScanner(TestAccumuloDefaultIndexScanner.buildMockHandler(10)); + Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, + Collections.<Rule,NodeProcessor> emptyMap(), null); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList<Node> topNodes = new ArrayList<Node>(); + topNodes.add(both); + HashMap<Node,Object> nodeOutput = new HashMap<Node,Object>(); + + try { + ogw.startWalking(topNodes, nodeOutput); + } catch (SemanticException ex) { + throw new RuntimeException(ex); + } + + // Filters are using an index which should match 3 rows + Object result = nodeOutput.get(both); + if ( result instanceof List) { + List results = (List) result; + Assert.assertEquals(3, results.size()); + Assert.assertTrue("does not contain row1", results.contains(new Range("row1"))); + Assert.assertTrue("does not contain row2", results.contains(new Range("row2"))); + Assert.assertTrue("does not contain row3", results.contains(new Range("row3"))); + } else { + Assert.fail("Results not a list"); + } + } + + @Test + public void testRangeOverBooleanIndexedField() throws Exception { + // mgr == true + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.booleanTypeInfo, "mgr", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, true); + List<ExprNodeDesc> children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, + new GenericUDFOPEqual(), children); + assertNotNull(node); + + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid"); + rangeGenerator.setIndexScanner(TestAccumuloDefaultIndexScanner.buildMockHandler(10)); + Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, + Collections.<Rule,NodeProcessor> emptyMap(), null); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList<Node> topNodes = new ArrayList<Node>(); + topNodes.add(node); + HashMap<Node,Object> nodeOutput = new HashMap<Node,Object>(); + + try { + ogw.startWalking(topNodes, nodeOutput); + } catch (SemanticException ex) { + throw new RuntimeException(ex); + } + + // Filters are using an index which should match 2 rows + Object result = nodeOutput.get(node); + if ( result instanceof List) { + List results = (List) result; + Assert.assertEquals(2, results.size()); + Assert.assertTrue("does not contain row1", results.contains( new Range( "row1"))); + Assert.assertTrue("does not contain row3", results.contains( new Range( "row3"))); + } + else { + Assert.fail("Results not a list"); + } + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/169e6559/accumulo-handler/src/test/queries/positive/accumulo_index.q ---------------------------------------------------------------------- diff --git a/accumulo-handler/src/test/queries/positive/accumulo_index.q b/accumulo-handler/src/test/queries/positive/accumulo_index.q new file mode 100644 index 0000000..52a33af --- /dev/null +++ b/accumulo-handler/src/test/queries/positive/accumulo_index.q @@ -0,0 +1,44 @@ +DROP TABLE accumulo_index_test; + +CREATE TABLE accumulo_index_test ( + rowid string, + active boolean, + num_offices tinyint, + num_personel smallint, + total_manhours int, + num_shareholders bigint, + eff_rating float, + err_rating double, + yearly_production decimal, + start_date date, + address varchar(100), + phone char(13), + last_update timestamp ) +ROW FORMAT SERDE 'org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe' +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( + "accumulo.columns.mapping" = ":rowID,a:act,a:off,a:per,a:mhs,a:shs,a:eff,a:err,a:yp,a:sd,a:addr,a:ph,a:lu", + "accumulo.table.name"="accumulo_index_test", + "accumulo.indexed.columns"="*", + "accumulo.indextable.name"="accumulo_index_idx" + ); + + +insert into accumulo_index_test values( "row1", true, 55, 107, 555555, 1223232332, + 4.5, 0.8, 1232223, "2001-10-10", "123 main street", + "555-555-5555", "2016-02-22 12:45:07.000000000"); + +select * from accumulo_index_test where active = 'true'; +select * from accumulo_index_test where num_offices = 55; +select * from accumulo_index_test where num_personel = 107; +select * from accumulo_index_test where total_manhours < 555556; +select * from accumulo_index_test where num_shareholders >= 1223232331; +select * from accumulo_index_test where eff_rating <= 4.5; +select * from accumulo_index_test where err_rating >= 0.8; +select * from accumulo_index_test where yearly_production = 1232223; +select * from accumulo_index_test where start_date = "2001-10-10"; +select * from accumulo_index_test where address >= "100 main street"; +select * from accumulo_index_test where phone <= "555-555-5555"; +select * from accumulo_index_test where last_update >= "2016-02-22 12:45:07"; + +DROP TABLE accumulo_index_test; http://git-wip-us.apache.org/repos/asf/hive/blob/169e6559/accumulo-handler/src/test/results/positive/accumulo_index.q.out ---------------------------------------------------------------------- diff --git a/accumulo-handler/src/test/results/positive/accumulo_index.q.out b/accumulo-handler/src/test/results/positive/accumulo_index.q.out new file mode 100644 index 0000000..5cb3d73 --- /dev/null +++ b/accumulo-handler/src/test/results/positive/accumulo_index.q.out @@ -0,0 +1,180 @@ +PREHOOK: query: DROP TABLE accumulo_index_test +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE accumulo_index_test +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE accumulo_index_test ( + rowid string, + active boolean, + num_offices tinyint, + num_personel smallint, + total_manhours int, + num_shareholders bigint, + eff_rating float, + err_rating double, + yearly_production decimal, + start_date date, + address varchar(100), + phone char(13), + last_update timestamp ) +ROW FORMAT SERDE 'org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe' +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( + "accumulo.columns.mapping" = ":rowID,a:act,a:off,a:per,a:mhs,a:shs,a:eff,a:err,a:yp,a:sd,a:addr,a:ph,a:lu", + "accumulo.table.name"="accumulo_index_test", + "accumulo.indexed.columns"="*", + "accumulo.indextable.name"="accumulo_index_idx" + ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@accumulo_index_test +POSTHOOK: query: CREATE TABLE accumulo_index_test ( + rowid string, + active boolean, + num_offices tinyint, + num_personel smallint, + total_manhours int, + num_shareholders bigint, + eff_rating float, + err_rating double, + yearly_production decimal, + start_date date, + address varchar(100), + phone char(13), + last_update timestamp ) +ROW FORMAT SERDE 'org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe' +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( + "accumulo.columns.mapping" = ":rowID,a:act,a:off,a:per,a:mhs,a:shs,a:eff,a:err,a:yp,a:sd,a:addr,a:ph,a:lu", + "accumulo.table.name"="accumulo_index_test", + "accumulo.indexed.columns"="*", + "accumulo.indextable.name"="accumulo_index_idx" + ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@accumulo_index_test +PREHOOK: query: insert into accumulo_index_test values( "row1", true, 55, 107, 555555, 1223232332, + 4.5, 0.8, 1232223, "2001-10-10", "123 main street", + "555-555-5555", "2016-02-22 12:45:07.000000000") +PREHOOK: type: QUERY +PREHOOK: Output: default@accumulo_index_test +POSTHOOK: query: insert into accumulo_index_test values( "row1", true, 55, 107, 555555, 1223232332, + 4.5, 0.8, 1232223, "2001-10-10", "123 main street", + "555-555-5555", "2016-02-22 12:45:07.000000000") +POSTHOOK: type: QUERY +POSTHOOK: Output: default@accumulo_index_test +PREHOOK: query: select * from accumulo_index_test where active = 'true' +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_index_test where active = 'true' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +row1 true 55 107 555555 1223232332 4.5 0.8 1232223 2001-10-10 123 main street 555-555-5555 2016-02-22 12:45:07 +PREHOOK: query: select * from accumulo_index_test where num_offices = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_index_test where num_offices = 55 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +row1 true 55 107 555555 1223232332 4.5 0.8 1232223 2001-10-10 123 main street 555-555-5555 2016-02-22 12:45:07 +PREHOOK: query: select * from accumulo_index_test where num_personel = 107 +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_index_test where num_personel = 107 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +row1 true 55 107 555555 1223232332 4.5 0.8 1232223 2001-10-10 123 main street 555-555-5555 2016-02-22 12:45:07 +PREHOOK: query: select * from accumulo_index_test where total_manhours < 555556 +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_index_test where total_manhours < 555556 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +row1 true 55 107 555555 1223232332 4.5 0.8 1232223 2001-10-10 123 main street 555-555-5555 2016-02-22 12:45:07 +PREHOOK: query: select * from accumulo_index_test where num_shareholders >= 1223232331 +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_index_test where num_shareholders >= 1223232331 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +row1 true 55 107 555555 1223232332 4.5 0.8 1232223 2001-10-10 123 main street 555-555-5555 2016-02-22 12:45:07 +PREHOOK: query: select * from accumulo_index_test where eff_rating <= 4.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_index_test where eff_rating <= 4.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +row1 true 55 107 555555 1223232332 4.5 0.8 1232223 2001-10-10 123 main street 555-555-5555 2016-02-22 12:45:07 +PREHOOK: query: select * from accumulo_index_test where err_rating >= 0.8 +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_index_test where err_rating >= 0.8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +row1 true 55 107 555555 1223232332 4.5 0.8 1232223 2001-10-10 123 main street 555-555-5555 2016-02-22 12:45:07 +PREHOOK: query: select * from accumulo_index_test where yearly_production = 1232223 +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_index_test where yearly_production = 1232223 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +row1 true 55 107 555555 1223232332 4.5 0.8 1232223 2001-10-10 123 main street 555-555-5555 2016-02-22 12:45:07 +PREHOOK: query: select * from accumulo_index_test where start_date = "2001-10-10" +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_index_test where start_date = "2001-10-10" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +row1 true 55 107 555555 1223232332 4.5 0.8 1232223 2001-10-10 123 main street 555-555-5555 2016-02-22 12:45:07 +PREHOOK: query: select * from accumulo_index_test where address >= "100 main street" +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_index_test where address >= "100 main street" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +row1 true 55 107 555555 1223232332 4.5 0.8 1232223 2001-10-10 123 main street 555-555-5555 2016-02-22 12:45:07 +PREHOOK: query: select * from accumulo_index_test where phone <= "555-555-5555" +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_index_test where phone <= "555-555-5555" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +row1 true 55 107 555555 1223232332 4.5 0.8 1232223 2001-10-10 123 main street 555-555-5555 2016-02-22 12:45:07 +PREHOOK: query: select * from accumulo_index_test where last_update >= "2016-02-22 12:45:07" +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_index_test where last_update >= "2016-02-22 12:45:07" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_index_test +#### A masked pattern was here #### +row1 true 55 107 555555 1223232332 4.5 0.8 1232223 2001-10-10 123 main street 555-555-5555 2016-02-22 12:45:07 +PREHOOK: query: DROP TABLE accumulo_index_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@accumulo_index_test +PREHOOK: Output: default@accumulo_index_test +POSTHOOK: query: DROP TABLE accumulo_index_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@accumulo_index_test +POSTHOOK: Output: default@accumulo_index_test