http://git-wip-us.apache.org/repos/asf/hive/blob/b0d3cb45/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 2152f00..a45cac6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -19,11 +19,6 @@ package org.apache.hadoop.hive.ql.metadata; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; -import static org.apache.hadoop.hive.serde.serdeConstants.COLLECTION_DELIM; -import static org.apache.hadoop.hive.serde.serdeConstants.ESCAPE_CHAR; -import static org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM; -import static org.apache.hadoop.hive.serde.serdeConstants.LINE_DELIM; -import static org.apache.hadoop.hive.serde.serdeConstants.MAPKEY_DELIM; import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT; import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME; @@ -108,7 +103,6 @@ import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalResponse; import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; import org.apache.hadoop.hive.metastore.api.HiveObjectRef; import org.apache.hadoop.hive.metastore.api.HiveObjectType; -import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.metastore.api.InsertEventRequestData; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.Materialization; @@ -116,7 +110,6 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.MetadataPpdResult; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.NotNullConstraintsRequest; -import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest; import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet; import org.apache.hadoop.hive.metastore.api.PrincipalType; @@ -127,11 +120,9 @@ import org.apache.hadoop.hive.metastore.api.SQLForeignKey; import org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint; import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint; -import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest; import org.apache.hadoop.hive.metastore.api.ShowCompactResponse; import org.apache.hadoop.hive.metastore.api.SkewedInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.UniqueConstraintsRequest; import org.apache.hadoop.hive.metastore.api.WMFullResourcePlan; import org.apache.hadoop.hive.metastore.api.WMMapping; @@ -150,7 +141,6 @@ import org.apache.hadoop.hive.ql.exec.FunctionTask; import org.apache.hadoop.hive.ql.exec.FunctionUtils; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.index.HiveIndexHandler; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; import org.apache.hadoop.hive.ql.log.PerfLogger; @@ -668,34 +658,6 @@ public class Hive { } } - public void alterIndex(String baseTableName, String indexName, Index newIdx) - throws InvalidOperationException, HiveException { - String[] names = Utilities.getDbTableName(baseTableName); - alterIndex(names[0], names[1], indexName, newIdx); - } - - /** - * Updates the existing index metadata with the new metadata. - * - * @param idxName - * name of the existing index - * @param newIdx - * new name of the index. could be the old name - * @throws InvalidOperationException - * if the changes in metadata is not acceptable - * @throws TException - */ - public void alterIndex(String dbName, String baseTblName, String idxName, Index newIdx) - throws InvalidOperationException, HiveException { - try { - getMSC().alter_index(dbName, baseTblName, idxName, newIdx); - } catch (MetaException e) { - throw new HiveException("Unable to alter index. " + e.getMessage(), e); - } catch (TException e) { - throw new HiveException("Unable to alter index. " + e.getMessage(), e); - } - } - /** * Updates the existing partition metadata with the new metadata. * @@ -934,243 +896,6 @@ public class Hive { } /** - * - * @param tableName - * table name - * @param indexName - * index name - * @param indexHandlerClass - * index handler class - * @param indexedCols - * index columns - * @param indexTblName - * index table's name - * @param deferredRebuild - * referred build index table's data - * @param inputFormat - * input format - * @param outputFormat - * output format - * @param serde - * @param storageHandler - * index table's storage handler - * @param location - * location - * @param idxProps - * idx - * @param serdeProps - * serde properties - * @param collItemDelim - * @param fieldDelim - * @param fieldEscape - * @param lineDelim - * @param mapKeyDelim - * @throws HiveException - */ - public void createIndex(String tableName, String indexName, String indexHandlerClass, - List<String> indexedCols, String indexTblName, boolean deferredRebuild, - String inputFormat, String outputFormat, String serde, - String storageHandler, String location, - Map<String, String> idxProps, Map<String, String> tblProps, Map<String, String> serdeProps, - String collItemDelim, String fieldDelim, String fieldEscape, - String lineDelim, String mapKeyDelim, String indexComment) - throws HiveException { - - try { - String tdname = Utilities.getDatabaseName(tableName); - String idname = Utilities.getDatabaseName(indexTblName); - if (!idname.equals(tdname)) { - throw new HiveException("Index on different database (" + idname - + ") from base table (" + tdname + ") is not supported."); - } - - Index old_index = null; - try { - old_index = getIndex(tableName, indexName); - } catch (Exception e) { - } - if (old_index != null) { - throw new HiveException("Index " + indexName + " already exists on table " + tableName); - } - - org.apache.hadoop.hive.metastore.api.Table baseTbl = getTable(tableName).getTTable(); - if (TableType.VIRTUAL_VIEW.toString().equals(baseTbl.getTableType())) { - throw new HiveException("tableName="+ tableName +" is a VIRTUAL VIEW. Index on VIRTUAL VIEW is not supported."); - } - if (baseTbl.isTemporary()) { - throw new HiveException("tableName=" + tableName - + " is a TEMPORARY TABLE. Index on TEMPORARY TABLE is not supported."); - } - - org.apache.hadoop.hive.metastore.api.Table temp = null; - try { - temp = getTable(indexTblName).getTTable(); - } catch (Exception e) { - } - if (temp != null) { - throw new HiveException("Table name " + indexTblName + " already exists. Choose another name."); - } - - SerDeInfo serdeInfo = new SerDeInfo(); - serdeInfo.setName(indexTblName); - - if(serde != null) { - serdeInfo.setSerializationLib(serde); - } else { - if (storageHandler == null) { - serdeInfo.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName()); - } else { - HiveStorageHandler sh = HiveUtils.getStorageHandler(getConf(), storageHandler); - String serDeClassName = sh.getSerDeClass().getName(); - serdeInfo.setSerializationLib(serDeClassName); - } - } - - serdeInfo.setParameters(new HashMap<String, String>()); - if (fieldDelim != null) { - serdeInfo.getParameters().put(FIELD_DELIM, fieldDelim); - serdeInfo.getParameters().put(SERIALIZATION_FORMAT, fieldDelim); - } - if (fieldEscape != null) { - serdeInfo.getParameters().put(ESCAPE_CHAR, fieldEscape); - } - if (collItemDelim != null) { - serdeInfo.getParameters().put(COLLECTION_DELIM, collItemDelim); - } - if (mapKeyDelim != null) { - serdeInfo.getParameters().put(MAPKEY_DELIM, mapKeyDelim); - } - if (lineDelim != null) { - serdeInfo.getParameters().put(LINE_DELIM, lineDelim); - } - - if (serdeProps != null) { - Iterator<Entry<String, String>> iter = serdeProps.entrySet() - .iterator(); - while (iter.hasNext()) { - Entry<String, String> m = iter.next(); - serdeInfo.getParameters().put(m.getKey(), m.getValue()); - } - } - - List<FieldSchema> indexTblCols = new ArrayList<FieldSchema>(); - List<Order> sortCols = new ArrayList<Order>(); - int k = 0; - Table metaBaseTbl = new Table(baseTbl); - // Even though we are storing these in metastore, get regular columns. Indexes on lengthy - // types from e.g. Avro schema will just fail to create the index table (by design). - List<FieldSchema> cols = metaBaseTbl.getCols(); - for (int i = 0; i < cols.size(); i++) { - FieldSchema col = cols.get(i); - if (indexedCols.contains(col.getName())) { - indexTblCols.add(col); - sortCols.add(new Order(col.getName(), 1)); - k++; - } - } - if (k != indexedCols.size()) { - throw new RuntimeException( - "Check the index columns, they should appear in the table being indexed."); - } - - int time = (int) (System.currentTimeMillis() / 1000); - org.apache.hadoop.hive.metastore.api.Table tt = null; - HiveIndexHandler indexHandler = HiveUtils.getIndexHandler(this.getConf(), indexHandlerClass); - - String itname = Utilities.getTableName(indexTblName); - if (indexHandler.usesIndexTable()) { - tt = new org.apache.hadoop.hive.ql.metadata.Table(idname, itname).getTTable(); - List<FieldSchema> partKeys = baseTbl.getPartitionKeys(); - tt.setPartitionKeys(partKeys); - tt.setTableType(TableType.INDEX_TABLE.toString()); - if (tblProps != null) { - for (Entry<String, String> prop : tblProps.entrySet()) { - tt.putToParameters(prop.getKey(), prop.getValue()); - } - } - SessionState ss = SessionState.get(); - CreateTableAutomaticGrant grants; - if (ss != null && ((grants = ss.getCreateTableGrants()) != null)) { - PrincipalPrivilegeSet principalPrivs = new PrincipalPrivilegeSet(); - principalPrivs.setUserPrivileges(grants.getUserGrants()); - principalPrivs.setGroupPrivileges(grants.getGroupGrants()); - principalPrivs.setRolePrivileges(grants.getRoleGrants()); - tt.setPrivileges(principalPrivs); - } - } - - if(!deferredRebuild) { - throw new RuntimeException("Please specify deferred rebuild using \" WITH DEFERRED REBUILD \"."); - } - - StorageDescriptor indexSd = new StorageDescriptor( - indexTblCols, - location, - inputFormat, - outputFormat, - false/*compressed - not used*/, - -1/*numBuckets - default is -1 when the table has no buckets*/, - serdeInfo, - null/*bucketCols*/, - sortCols, - null/*parameters*/); - - String ttname = Utilities.getTableName(tableName); - Index indexDesc = new Index(indexName, indexHandlerClass, tdname, ttname, time, time, itname, - indexSd, new HashMap<String,String>(), deferredRebuild); - if (indexComment != null) { - indexDesc.getParameters().put("comment", indexComment); - } - - if (idxProps != null) - { - indexDesc.getParameters().putAll(idxProps); - } - - indexHandler.analyzeIndexDefinition(baseTbl, indexDesc, tt); - - this.getMSC().createIndex(indexDesc, tt); - - } catch (Exception e) { - throw new HiveException(e); - } - } - - public Index getIndex(String baseTableName, String indexName) throws HiveException { - String[] names = Utilities.getDbTableName(baseTableName); - return this.getIndex(names[0], names[1], indexName); - } - - public Index getIndex(String dbName, String baseTableName, - String indexName) throws HiveException { - try { - return this.getMSC().getIndex(dbName, baseTableName, indexName); - } catch (Exception e) { - throw new HiveException(e); - } - } - - public boolean dropIndex(String baseTableName, String index_name, - boolean throwException, boolean deleteData) throws HiveException { - String[] names = Utilities.getDbTableName(baseTableName); - return dropIndex(names[0], names[1], index_name, throwException, deleteData); - } - - public boolean dropIndex(String db_name, String tbl_name, String index_name, - boolean throwException, boolean deleteData) throws HiveException { - try { - return getMSC().dropIndex(db_name, tbl_name, index_name, deleteData); - } catch (NoSuchObjectException e) { - if (throwException) { - throw new HiveException("Index " + index_name + " doesn't exist. ", e); - } - return false; - } catch (Exception e) { - throw new HiveException(e.getMessage(), e); - } - } - - /** * Drops table along with the data in it. If the table doesn't exist then it * is a no-op. If ifPurge option is specified it is passed to the * hdfs command that removes table data from warehouse to make it skip trash. @@ -1522,8 +1247,9 @@ public class Hive { */ public List<String> getTablesByType(String dbName, String pattern, TableType type) throws HiveException { - if (dbName == null) + if (dbName == null) { dbName = SessionState.get().getCurrentDatabase(); + } try { if (type != null) { @@ -2779,13 +2505,17 @@ private void constructOneLBLocationMap(FileStatus fSta, } addInsertNonDirectoryInformation(p, fileSystem, insertData); } - if (directories == null) return; + if (directories == null) { + return; + } // We don't expect any nesting in most cases, or a lot of it if it is present; union and LB // are some examples where we would have 1, or few, levels respectively. while (!directories.isEmpty()) { Path dir = directories.poll(); FileStatus[] contents = fileSystem.listStatus(dir); - if (contents == null) continue; + if (contents == null) { + continue; + } for (FileStatus status : contents) { if (status.isDirectory()) { directories.add(status.getPath()); @@ -3775,13 +3505,15 @@ private void constructOneLBLocationMap(FileStatus fSta, ErrorMsg errorMsg = ErrorMsg.getErrorMsg(e); - if (logMsg != null) + if (logMsg != null) { LOG.info(String.format(logMsg, e.getMessage())); + } - if (errorMsg != ErrorMsg.UNRESOLVED_RT_EXCEPTION) + if (errorMsg != ErrorMsg.UNRESOLVED_RT_EXCEPTION) { return new HiveException(e, e.getMessage(), errorMsg, hiveErrMsg); - else + } else { return new HiveException(msg, e); + } } /** @@ -3979,7 +3711,9 @@ private void constructOneLBLocationMap(FileStatus fSta, bucketDest.toUri().toString()); try { fs.rename(bucketSrc, bucketDest); - if (newFiles != null) newFiles.add(bucketDest); + if (newFiles != null) { + newFiles.add(bucketDest); + } } catch (Exception e) { throw getHiveException(e, msg); } @@ -4118,7 +3852,9 @@ private void constructOneLBLocationMap(FileStatus fSta, recycleDirToCmPath(path, purge); } FileStatus[] statuses = fs.listStatus(path, pathFilter); - if (statuses == null || statuses.length == 0) return; + if (statuses == null || statuses.length == 0) { + return; + } if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { String s = "Deleting files under " + path + " for replace: "; for (FileStatus file : statuses) { @@ -4342,17 +4078,6 @@ private void constructOneLBLocationMap(FileStatus fSta, } } - public List<Index> getIndexes(String dbName, String tblName, short max) throws HiveException { - List<Index> indexes = null; - try { - indexes = getMSC().listIndexes(dbName, tblName, max); - } catch (Exception e) { - LOG.error(StringUtils.stringifyException(e)); - throw new HiveException(e); - } - return indexes; - } - public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws HiveException { try { return getMSC().setPartitionColumnStatistics(request); @@ -4440,6 +4165,7 @@ private void constructOneLBLocationMap(FileStatus fSta, /** * @deprecated use {@link #compact2(String, String, String, String, Map)} */ + @Deprecated public void compact(String dbname, String tableName, String partName, String compactType, Map<String, String> tblproperties) throws HiveException { compact2(dbname, tableName, partName, compactType, tblproperties); @@ -4461,9 +4187,13 @@ private void constructOneLBLocationMap(FileStatus fSta, throws HiveException { try { CompactionType cr = null; - if ("major".equals(compactType)) cr = CompactionType.MAJOR; - else if ("minor".equals(compactType)) cr = CompactionType.MINOR; - else throw new RuntimeException("Unknown compaction type " + compactType); + if ("major".equals(compactType)) { + cr = CompactionType.MAJOR; + } else if ("minor".equals(compactType)) { + cr = CompactionType.MINOR; + } else { + throw new RuntimeException("Unknown compaction type " + compactType); + } return getMSC().compact2(dbname, tableName, partName, cr, tblproperties); } catch (Exception e) { LOG.error(StringUtils.stringifyException(e));
http://git-wip-us.apache.org/repos/asf/hive/blob/b0d3cb45/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java index 16c9834..dae18fb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java @@ -26,10 +26,8 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.tez.TezContext; -import org.apache.hadoop.hive.ql.index.HiveIndexHandler; import org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator; import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; import org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider; @@ -315,24 +313,6 @@ public final class HiveUtils { // prevent instantiation } - public static HiveIndexHandler getIndexHandler(HiveConf conf, - String indexHandlerClass) throws HiveException { - - if (indexHandlerClass == null) { - return null; - } - try { - Class<? extends HiveIndexHandler> handlerClass = - (Class<? extends HiveIndexHandler>) - Class.forName(indexHandlerClass, true, Utilities.getSessionSpecifiedClassLoader()); - HiveIndexHandler indexHandler = ReflectionUtils.newInstance(handlerClass, conf); - return indexHandler; - } catch (ClassNotFoundException e) { - throw new HiveException("Error in loading index handler." - + e.getMessage(), e); - } - } - @SuppressWarnings("unchecked") public static List<HiveMetastoreAuthorizationProvider> getMetaStoreAuthorizeProviderManagers( Configuration conf, HiveConf.ConfVars authorizationProviderConfKey, @@ -438,22 +418,6 @@ public final class HiveUtils { return ret; } - - /** - * Convert FieldSchemas to columnNames with backticks around them. - */ - public static String getUnparsedColumnNamesFromFieldSchema( - List<FieldSchema> fieldSchemas) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < fieldSchemas.size(); i++) { - if (i > 0) { - sb.append(","); - } - sb.append(HiveUtils.unparseIdentifier(fieldSchemas.get(i).getName())); - } - return sb.toString(); - } - public static String getLocalDirList(Configuration conf) { if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { TezContext tezContext = (TezContext) TezContext.get(); http://git-wip-us.apache.org/repos/asf/hive/blob/b0d3cb45/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index 3b87824..a5b6a4b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -32,7 +32,6 @@ import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; @@ -42,8 +41,6 @@ import org.apache.hadoop.hive.metastore.api.WMPool; import org.apache.hadoop.hive.metastore.api.WMPoolTrigger; import org.apache.hadoop.hive.metastore.api.WMResourcePlan; import org.apache.hadoop.hive.metastore.api.WMTrigger; -import org.apache.hadoop.hive.ql.index.HiveIndex; -import org.apache.hadoop.hive.ql.index.HiveIndex.IndexType; import org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; @@ -55,7 +52,6 @@ import org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo.ForeignKeyCol; import org.apache.hadoop.hive.ql.metadata.NotNullConstraint; import org.apache.hadoop.hive.ql.plan.DescTableDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.plan.ShowIndexesDesc; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hive.common.util.HiveStringUtils; @@ -136,45 +132,6 @@ public final class MetaDataFormatUtils { return null; } - public static String getIndexInformation(Index index, boolean isOutputPadded) { - StringBuilder indexInfo = new StringBuilder(DEFAULT_STRINGBUILDER_SIZE); - - List<String> indexColumns = new ArrayList<String>(); - - indexColumns.add(index.getIndexName()); - indexColumns.add(index.getOrigTableName()); - - // index key names - List<FieldSchema> indexKeys = index.getSd().getCols(); - StringBuilder keyString = new StringBuilder(); - boolean first = true; - for (FieldSchema key : indexKeys) - { - if (!first) - { - keyString.append(", "); - } - keyString.append(key.getName()); - first = false; - } - - indexColumns.add(keyString.toString()); - - indexColumns.add(index.getIndexTableName()); - - // index type - String indexHandlerClass = index.getIndexHandlerClass(); - IndexType indexType = HiveIndex.getIndexTypeByClassName(indexHandlerClass); - indexColumns.add(indexType.getName()); - - String comment = HiveStringUtils.escapeJava(index.getParameters().get("comment")); - indexColumns.add(comment); - - formatOutput(indexColumns.toArray(new String[0]), indexInfo, isOutputPadded, true); - - return indexInfo.toString(); - } - public static String getConstraintsInformation(PrimaryKeyInfo pkInfo, ForeignKeyInfo fkInfo, UniqueConstraint ukInfo, NotNullConstraint nnInfo) { StringBuilder constraintsInfo = new StringBuilder(DEFAULT_STRINGBUILDER_SIZE); @@ -713,12 +670,6 @@ public final class MetaDataFormatUtils { return DescTableDesc.getSchema(showColStats).split("#")[0].split(","); } - public static String getIndexColumnsHeader() { - StringBuilder indexCols = new StringBuilder(DEFAULT_STRINGBUILDER_SIZE); - formatOutput(ShowIndexesDesc.getSchema().split("#")[0].split(","), indexCols); - return indexCols.toString(); - } - public static MetaDataFormatter getFormatter(HiveConf conf) { if ("json".equals(conf.get(HiveConf.ConfVars.HIVE_DDL_OUTPUT_FORMAT.varname, "text"))) { return new JsonMetaDataFormatter(); @@ -802,7 +753,9 @@ public final class MetaDataFormatUtils { if (p2.pool == null) { return (p1.pool == null) ? 0 : -1; } - if (p1.pool == null) return 1; + if (p1.pool == null) { + return 1; + } return Double.compare(p2.pool.getAllocFraction(), p1.pool.getAllocFraction()); }); for (PoolTreeNode child : children) { http://git-wip-us.apache.org/repos/asf/hive/blob/b0d3cb45/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java deleted file mode 100644 index 81952bf..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Set; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.Index; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.hooks.ReadEntity; -import org.apache.hadoop.hive.ql.hooks.WriteEntity; -import org.apache.hadoop.hive.ql.index.IndexMetadataChangeTask; -import org.apache.hadoop.hive.ql.index.IndexMetadataChangeWork; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.physical.index.IndexWhereProcessor; -import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.session.LineageState; - -/** - * Utility class for index support. - * Currently used for BITMAP and AGGREGATE index - * - */ -public final class IndexUtils { - - private static final Logger LOG = LoggerFactory.getLogger(IndexWhereProcessor.class.getName()); - - private IndexUtils(){ - } - - /** - * Check the partitions used by the table scan to make sure they also exist in the - * index table. - * @param pctx - * @param indexes - * @return partitions used by query. null if they do not exist in index table - * @throws HiveException - */ - public static Set<Partition> checkPartitionsCoveredByIndex(TableScanOperator tableScan, - ParseContext pctx, List<Index> indexes) throws HiveException { - Hive hive = Hive.get(pctx.getConf()); - // make sure each partition exists on the index table - PrunedPartitionList queryPartitionList = pctx.getOpToPartList().get(tableScan); - Set<Partition> queryPartitions = queryPartitionList.getPartitions(); - if (queryPartitions == null || queryPartitions.isEmpty()) { - return null; - } - - for (Partition part : queryPartitions) { - if (!containsPartition(hive, part, indexes)) { - return null; // problem if it doesn't contain the partition - } - } - - return queryPartitions; - } - - /** - * check that every index table contains the given partition and is fresh - */ - private static boolean containsPartition(Hive hive, Partition part, List<Index> indexes) - throws HiveException { - HashMap<String, String> partSpec = part.getSpec(); - if (partSpec.isEmpty()) { - // empty specs come from non-partitioned tables - return isIndexTableFresh(hive, indexes, part.getTable()); - } - - for (Index index : indexes) { - // index.getDbName() is used as a default database, which is database of target table, - // if index.getIndexTableName() does not contain database name - String[] qualified = Utilities.getDbTableName(index.getDbName(), index.getIndexTableName()); - Table indexTable = hive.getTable(qualified[0], qualified[1]); - // get partitions that match the spec - Partition matchingPartition = hive.getPartition(indexTable, partSpec, false); - if (matchingPartition == null) { - LOG.info("Index table " + indexTable + "did not contain built partition that matched " + partSpec); - return false; - } else if (!isIndexPartitionFresh(hive, index, part)) { - return false; - } - } - return true; - } - - /** - * Check the index partitions on a partitioned table exist and are fresh - */ - private static boolean isIndexPartitionFresh(Hive hive, Index index, - Partition part) throws HiveException { - LOG.info("checking index staleness..."); - try { - String indexTs = index.getParameters().get(part.getSpec().toString()); - if (indexTs == null) { - return false; - } - - FileSystem partFs = part.getDataLocation().getFileSystem(hive.getConf()); - FileStatus[] parts = partFs.listStatus(part.getDataLocation(), FileUtils.HIDDEN_FILES_PATH_FILTER); - for (FileStatus status : parts) { - if (status.getModificationTime() > Long.parseLong(indexTs)) { - LOG.info("Index is stale on partition '" + part.getName() - + "'. Modified time (" + status.getModificationTime() + ") for '" + status.getPath() - + "' is higher than index creation time (" + indexTs + ")."); - return false; - } - } - } catch (IOException e) { - throw new HiveException("Failed to grab timestamp information from partition '" + part.getName() + "': " + e.getMessage(), e); - } - return true; - } - - /** - * Check that the indexes on the un-partitioned table exist and are fresh - */ - private static boolean isIndexTableFresh(Hive hive, List<Index> indexes, Table src) - throws HiveException { - //check that they exist - if (indexes == null || indexes.size() == 0) { - return false; - } - //check that they are not stale - for (Index index : indexes) { - LOG.info("checking index staleness..."); - try { - String indexTs = index.getParameters().get("base_timestamp"); - if (indexTs == null) { - return false; - } - - FileSystem srcFs = src.getPath().getFileSystem(hive.getConf()); - FileStatus[] srcs = srcFs.listStatus(src.getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); - for (FileStatus status : srcs) { - if (status.getModificationTime() > Long.parseLong(indexTs)) { - LOG.info("Index is stale on table '" + src.getTableName() - + "'. Modified time (" + status.getModificationTime() + ") for '" + status.getPath() - + "' is higher than index creation time (" + indexTs + ")."); - return false; - } - } - } catch (IOException e) { - throw new HiveException("Failed to grab timestamp information from table '" + src.getTableName() + "': " + e.getMessage(), e); - } - } - return true; - } - - - /** - * Get a list of indexes on a table that match given types. - */ - public static List<Index> getIndexes(Table baseTableMetaData, List<String> matchIndexTypes) - throws SemanticException { - List<Index> matchingIndexes = new ArrayList<Index>(); - - List<Index> indexesOnTable; - try { - indexesOnTable = getAllIndexes(baseTableMetaData, (short) -1); // get all indexes - } catch (HiveException e) { - throw new SemanticException("Error accessing metastore", e); - } - - for (Index index : indexesOnTable) { - String indexType = index.getIndexHandlerClass(); - if (matchIndexTypes.contains(indexType)) { - matchingIndexes.add(index); - } - } - return matchingIndexes; - } - - /** - * @return List containing Indexes names if there are indexes on this table - * @throws HiveException - **/ - public static List<Index> getAllIndexes(Table table, short max) throws HiveException { - Hive hive = Hive.get(); - return hive.getIndexes(table.getTTable().getDbName(), table.getTTable().getTableName(), max); - } - - public static Task<?> createRootTask( - HiveConf builderConf, - Set<ReadEntity> inputs, - Set<WriteEntity> outputs, - StringBuilder command, - LinkedHashMap<String, String> partSpec, - String indexTableName, - String dbName, - LineageState lineageState){ - // Don't try to index optimize the query to build the index - HiveConf.setBoolVar(builderConf, HiveConf.ConfVars.HIVEOPTINDEXFILTER, false); - Driver driver = new Driver(builderConf, SessionState.get().getUserName(), lineageState); - driver.compile(command.toString(), false); - - Task<?> rootTask = driver.getPlan().getRootTasks().get(0); - inputs.addAll(driver.getPlan().getInputs()); - outputs.addAll(driver.getPlan().getOutputs()); - - IndexMetadataChangeWork indexMetaChange = new IndexMetadataChangeWork(partSpec, - indexTableName, dbName); - IndexMetadataChangeTask indexMetaChangeTsk = - (IndexMetadataChangeTask) TaskFactory.get(indexMetaChange, builderConf); - indexMetaChangeTsk.setWork(indexMetaChange); - rootTask.addDependentTask(indexMetaChangeTsk); - - driver.destroy(); - - return rootTask; - } - - -} http://git-wip-us.apache.org/repos/asf/hive/blob/b0d3cb45/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 4f6be6d..71f7380 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -26,7 +26,6 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc; import org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer; import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication; -import org.apache.hadoop.hive.ql.optimizer.index.RewriteGBUsingIndex; import org.apache.hadoop.hive.ql.optimizer.lineage.Generator; import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner; import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits; @@ -110,9 +109,9 @@ public class Optimizer { } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION) && - !pctx.getContext().isCboSucceeded()) { - // We run constant propagation twice because after predicate pushdown, filter expressions - // are combined and may become eligible for reduction (like is not null filter). + !pctx.getContext().isCboSucceeded()) { + // We run constant propagation twice because after predicate pushdown, filter expressions + // are combined and may become eligible for reduction (like is not null filter). transformations.add(new ConstantPropagate()); } @@ -155,9 +154,6 @@ public class Optimizer { LOG.warn("Skew join is currently not supported in tez! Disabling the skew join optimization."); } } - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTGBYUSINGINDEX)) { - transformations.add(new RewriteGBUsingIndex()); - } transformations.add(new SamplePruner()); MapJoinProcessor mapJoinProcessor = isSparkExecEngine ? new SparkMapJoinProcessor() http://git-wip-us.apache.org/repos/asf/hive/blob/b0d3cb45/ql/src/java/org/apache/hadoop/hive/ql/optimizer/QueryPlanPostProcessor.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/QueryPlanPostProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/QueryPlanPostProcessor.java index 5f0e842..c0ce684 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/QueryPlanPostProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/QueryPlanPostProcessor.java @@ -26,7 +26,6 @@ import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.repl.ReplStateLogWork; import org.apache.hadoop.hive.ql.exec.repl.bootstrap.ReplLoadWork; -import org.apache.hadoop.hive.ql.index.IndexMetadataChangeWork; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.parse.GenTezProcContext; import org.apache.hadoop.hive.ql.parse.GenTezWork; @@ -102,7 +101,6 @@ public class QueryPlanPostProcessor { } else if(work instanceof ReplLoadWork || work instanceof ReplStateLogWork || - work instanceof IndexMetadataChangeWork || work instanceof GenTezWork || work instanceof GenSparkWork || work instanceof ArchiveWork || http://git-wip-us.apache.org/repos/asf/hive/blob/b0d3cb45/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java deleted file mode 100644 index 641d877..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer.index; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Stack; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.ql.exec.FilterOperator; -import org.apache.hadoop.hive.ql.exec.GroupByOperator; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.SelectOperator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; -import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.parse.SemanticException; - -/** - * RewriteCanApplyCtx class stores the context for the {@link RewriteCanApplyProcFactory} - * to determine if any index can be used and if the input query - * meets all the criteria for rewrite optimization. - */ -public final class RewriteCanApplyCtx implements NodeProcessorCtx { - - private static final Logger LOG = LoggerFactory.getLogger(RewriteCanApplyCtx.class.getName()); - - private RewriteCanApplyCtx(ParseContext parseContext) { - this.parseContext = parseContext; - } - - public static RewriteCanApplyCtx getInstance(ParseContext parseContext){ - return new RewriteCanApplyCtx(parseContext); - } - - // Rewrite Variables - private boolean selClauseColsFetchException = false; - private boolean queryHasGroupBy = false; - private boolean aggFuncIsNotCount = false; - private boolean aggParameterException = false; - - //The most important, indexKey - private String indexKey; - - private final ParseContext parseContext; - private String alias; - private String baseTableName; - private String indexTableName; - private String aggFunction; - - private TableScanOperator tableScanOperator; - private List<SelectOperator> selectOperators; - private List<GroupByOperator> groupByOperators; - - void resetCanApplyCtx(){ - setQueryHasGroupBy(false); - setAggFuncIsNotCount(false); - setSelClauseColsFetchException(false); - setBaseTableName(""); - setAggFunction(""); - setIndexKey(""); - } - - public boolean isQueryHasGroupBy() { - return queryHasGroupBy; - } - - public void setQueryHasGroupBy(boolean queryHasGroupBy) { - this.queryHasGroupBy = queryHasGroupBy; - } - - public boolean isAggFuncIsNotCount() { - return aggFuncIsNotCount; - } - - public void setAggFuncIsNotCount(boolean aggFuncIsNotCount) { - this.aggFuncIsNotCount = aggFuncIsNotCount; - } - - public Map<String, String> getBaseToIdxTableMap() { - return baseToIdxTableMap; - } - - public void setAggFunction(String aggFunction) { - this.aggFunction = aggFunction; - } - - public String getAggFunction() { - return aggFunction; - } - - public void setSelClauseColsFetchException(boolean selClauseColsFetchException) { - this.selClauseColsFetchException = selClauseColsFetchException; - } - - public boolean isSelClauseColsFetchException() { - return selClauseColsFetchException; - } - - public String getAlias() { - return alias; - } - - public void setAlias(String alias) { - this.alias = alias; - } - - public String getBaseTableName() { - return baseTableName; - } - - public void setBaseTableName(String baseTableName) { - this.baseTableName = baseTableName; - } - - public String getIndexTableName() { - return indexTableName; - } - - public void setIndexTableName(String indexTableName) { - this.indexTableName = indexTableName; - } - - public ParseContext getParseContext() { - return parseContext; - } - - /** - * This method walks all the nodes starting from topOp TableScanOperator node - * and invokes methods from {@link RewriteCanApplyProcFactory} for each of the rules - * added to the opRules map. We use the {@link PreOrderOnceWalker} for a pre-order - * traversal of the operator tree. - * - * The methods from {@link RewriteCanApplyProcFactory} set appropriate values in - * {@link RewriteVars} enum. - * - * @param topOp - * @throws SemanticException - */ - void populateRewriteVars(TableScanOperator topOp) - throws SemanticException{ - Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); - //^TS%[(SEL%)|(FIL%)]*GRY%[(FIL%)]*RS%[(FIL%)]*GRY% - opRules.put( - new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%[(" - + SelectOperator.getOperatorName() + "%)|(" + FilterOperator.getOperatorName() + "%)]*" - + GroupByOperator.getOperatorName() + "%[" + FilterOperator.getOperatorName() + "%]*" - + ReduceSinkOperator.getOperatorName() + "%[" + FilterOperator.getOperatorName() - + "%]*" + GroupByOperator.getOperatorName() + "%"), - RewriteCanApplyProcFactory.canApplyOnTableScanOperator(topOp)); - - // The dispatcher fires the processor corresponding to the closest matching - // rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, this); - GraphWalker ogw = new PreOrderOnceWalker(disp); - - // Create a list of topop nodes - List<Node> topNodes = new ArrayList<Node>(); - topNodes.add(topOp); - - try { - ogw.startWalking(topNodes, null); - } catch (SemanticException e) { - LOG.error("Exception in walking operator tree. Rewrite variables not populated"); - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - throw new SemanticException(e.getMessage(), e); - } - } - - - /** - * Default procedure for {@link DefaultRuleDispatcher}. - * @return - */ - private NodeProcessor getDefaultProc() { - return new NodeProcessor() { - @Override - public Object process(Node nd, Stack<Node> stack, - NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - return null; - } - }; - } - - - //Map for base table to index table mapping - //TableScan operator for base table will be modified to read from index table - private final Map<String, String> baseToIdxTableMap = new HashMap<String, String>();; - - public void addTable(String baseTableName, String indexTableName) { - baseToIdxTableMap.put(baseTableName, indexTableName); - } - - public String findBaseTable(String baseTableName) { - return baseToIdxTableMap.get(baseTableName); - } - - public String getIndexKey() { - return indexKey; - } - - public void setIndexKey(String indexKey) { - this.indexKey = indexKey; - } - - public TableScanOperator getTableScanOperator() { - return tableScanOperator; - } - - public void setTableScanOperator(TableScanOperator tableScanOperator) { - this.tableScanOperator = tableScanOperator; - } - - public List<SelectOperator> getSelectOperators() { - return selectOperators; - } - - public void setSelectOperators(List<SelectOperator> selectOperators) { - this.selectOperators = selectOperators; - } - - public List<GroupByOperator> getGroupByOperators() { - return groupByOperators; - } - - public void setGroupByOperators(List<GroupByOperator> groupByOperators) { - this.groupByOperators = groupByOperators; - } - - public void setAggParameterException(boolean aggParameterException) { - this.aggParameterException = aggParameterException; - } - - public boolean isAggParameterException() { - return aggParameterException; - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/b0d3cb45/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java deleted file mode 100644 index 41d2282..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer.index; - -import org.apache.hadoop.hive.ql.exec.GroupByOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.SelectOperator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.AggregationDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; -import org.apache.hadoop.hive.ql.plan.GroupByDesc; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; - -import java.util.ArrayList; -import java.util.List; -import java.util.Stack; - -/** - * Factory of methods used by {@link RewriteGBUsingIndex} - * to determine if the rewrite optimization can be applied to the input query. - * - */ -public final class RewriteCanApplyProcFactory { - public static CheckTableScanProc canApplyOnTableScanOperator(TableScanOperator topOp) { - return new CheckTableScanProc(); - } - - private static class CheckTableScanProc implements NodeProcessor { - public CheckTableScanProc() { - } - - public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) - throws SemanticException { - RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx) ctx; - for (Node node : stack) { - // For table scan operator, - // check ReferencedColumns to make sure that only the index column is - // selected for the following operators. - if (node instanceof TableScanOperator) { - TableScanOperator ts = (TableScanOperator) node; - canApplyCtx.setTableScanOperator(ts); - List<String> selectColumns = ts.getConf().getReferencedColumns(); - if (selectColumns == null || selectColumns.size() != 1) { - canApplyCtx.setSelClauseColsFetchException(true); - return null; - } else { - canApplyCtx.setIndexKey(selectColumns.get(0)); - } - } else if (node instanceof SelectOperator) { - // For select operators in the stack, we just add them - if (canApplyCtx.getSelectOperators() == null) { - canApplyCtx.setSelectOperators(new ArrayList<SelectOperator>()); - } - canApplyCtx.getSelectOperators().add((SelectOperator) node); - } else if (node instanceof GroupByOperator) { - if (canApplyCtx.getGroupByOperators() == null) { - canApplyCtx.setGroupByOperators(new ArrayList<GroupByOperator>()); - } - // According to the pre-order, - // the first GroupbyOperator is the one before RS - // and the second one is the one after RS - GroupByOperator operator = (GroupByOperator) node; - canApplyCtx.getGroupByOperators().add(operator); - if (!canApplyCtx.isQueryHasGroupBy()) { - canApplyCtx.setQueryHasGroupBy(true); - GroupByDesc conf = operator.getConf(); - List<AggregationDesc> aggrList = conf.getAggregators(); - if (aggrList == null || aggrList.size() != 1 - || !("count".equals(aggrList.get(0).getGenericUDAFName()))) { - // In the current implementation, we make sure that only count is - // in the function - canApplyCtx.setAggFuncIsNotCount(true); - return null; - } else { - List<ExprNodeDesc> para = aggrList.get(0).getParameters(); - if (para == null || para.size() == 0 || para.size() > 1) { - canApplyCtx.setAggParameterException(true); - return null; - } else { - ExprNodeDesc expr = ExprNodeDescUtils.backtrack(para.get(0), operator, - (Operator<OperatorDesc>) stack.get(0)); - if (!(expr instanceof ExprNodeColumnDesc)) { - canApplyCtx.setAggParameterException(true); - return null; - } - } - } - } - } - } - return null; - } - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/b0d3cb45/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java deleted file mode 100644 index 3cb176e..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer.index; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Index; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.index.AggregateIndexHandler; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.IndexUtils; -import org.apache.hadoop.hive.ql.optimizer.Transform; -import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.parse.SemanticException; - - -/** - * RewriteGBUsingIndex is implemented as one of the Rule-based Optimizations. - * Implements optimizations for GroupBy clause rewrite using aggregate index. - * This optimization rewrites GroupBy query over base table to the query over simple table-scan - * over index table, if there is index on the group by key(s) or the distinct column(s). - * E.g. - * <code> - * select count(key) - * from table - * group by key; - * </code> - * to - * <code> - * select sum(_count_of_key) - * from idx_table - * group by key; - * </code> - * - * The rewrite supports following queries: - * <ul> - * <li> Queries having only those col refs that are in the index key. - * <li> Queries that have index key col refs - * <ul> - * <li> in SELECT - * <li> in WHERE - * <li> in GROUP BY - * </ul> - * <li> Queries with agg func COUNT(index key col ref) in SELECT - * <li> Queries with SELECT DISTINCT index_key_col_refs - * <li> Queries having a subquery satisfying above condition (only the subquery is rewritten) - * </ul> - * - * @see AggregateIndexHandler - * @see IndexUtils - * @see RewriteCanApplyCtx - * @see RewriteCanApplyProcFactory - * @see RewriteParseContextGenerator - * @see RewriteQueryUsingAggregateIndexCtx - * @see RewriteQueryUsingAggregateIndex - * For test cases, @see ql_rewrite_gbtoidx.q - */ - -public class RewriteGBUsingIndex extends Transform { - private ParseContext parseContext; - // Assumes one instance of this + single-threaded compilation for each query. - private Hive hiveDb; - private HiveConf hiveConf; - private static final Logger LOG = LoggerFactory.getLogger(RewriteGBUsingIndex.class.getName()); - - /* - * Stores the list of top TableScanOperator names for which the rewrite - * can be applied and the action that needs to be performed for operator tree - * starting from this TableScanOperator - */ - private final Map<String, RewriteCanApplyCtx> tsOpToProcess = - new LinkedHashMap<String, RewriteCanApplyCtx>(); - - //Index Validation Variables - private static final String IDX_BUCKET_COL = "_bucketname"; - private static final String IDX_OFFSETS_ARRAY_COL = "_offsets"; - - - @Override - public ParseContext transform(ParseContext pctx) throws SemanticException { - parseContext = pctx; - hiveConf = parseContext.getConf(); - try { - hiveDb = Hive.get(hiveConf); - } catch (HiveException e) { - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - throw new SemanticException(e.getMessage(), e); - } - - // Don't try to index optimize the query to build the index - HiveConf.setBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTINDEXFILTER, false); - - /* Check if the input query passes all the tests to be eligible for a rewrite - * If yes, rewrite original query; else, return the current parseContext - */ - if (shouldApplyOptimization()) { - LOG.info("Rewriting Original Query using " + getName() + " optimization."); - rewriteOriginalQuery(); - } - return parseContext; - } - - private String getName() { - return "RewriteGBUsingIndex"; - } - - /** - * We traverse the current operator tree to check for conditions in which the - * optimization cannot be applied. - * - * At the end, we check if all conditions have passed for rewrite. If yes, we - * determine if the the index is usable for rewrite. Else, we log the condition which - * did not meet the rewrite criterion. - * - * @return - * @throws SemanticException - */ - boolean shouldApplyOptimization() throws SemanticException { - Map<Table, List<Index>> tableToIndex = getIndexesForRewrite(); - if (tableToIndex.isEmpty()) { - LOG.debug("No Valid Index Found to apply Rewrite, " + - "skipping " + getName() + " optimization"); - return false; - } - /* - * This code iterates over each TableScanOperator from the topOps map from ParseContext. - * For each operator tree originating from this top TableScanOperator, we determine - * if the optimization can be applied. If yes, we add the name of the top table to - * the tsOpToProcess to apply rewrite later on. - * */ - for (Map.Entry<String, TableScanOperator> entry : parseContext.getTopOps().entrySet()) { - String alias = entry.getKey(); - TableScanOperator topOp = entry.getValue(); - Table table = topOp.getConf().getTableMetadata(); - List<Index> indexes = tableToIndex.get(table); - if (indexes.isEmpty()) { - continue; - } - if (table.isPartitioned()) { - //if base table has partitions, we need to check if index is built for - //all partitions. If not, then we do not apply the optimization - if (!checkIfIndexBuiltOnAllTablePartitions(topOp, indexes)) { - LOG.debug("Index is not built for all table partitions, " + - "skipping " + getName() + " optimization"); - continue; - } - } - //check if rewrite can be applied for operator tree - //if there are no partitions on base table - checkIfRewriteCanBeApplied(alias, topOp, table, indexes); - } - return !tsOpToProcess.isEmpty(); - } - - /** - * This methods checks if rewrite can be applied using the index and also - * verifies all conditions of the operator tree. - * - * @param topOp - TableScanOperator for a single the operator tree branch - * @param indexes - Map of a table and list of indexes on it - * @return - true if rewrite can be applied on the current branch; false otherwise - * @throws SemanticException - */ - private boolean checkIfRewriteCanBeApplied(String alias, TableScanOperator topOp, - Table baseTable, List<Index> indexes) throws SemanticException{ - //Context for checking if this optimization can be applied to the input query - RewriteCanApplyCtx canApplyCtx = RewriteCanApplyCtx.getInstance(parseContext); - canApplyCtx.setAlias(alias); - canApplyCtx.setBaseTableName(baseTable.getTableName()); - canApplyCtx.populateRewriteVars(topOp); - Map<Index, String> indexTableMap = getIndexToKeysMap(indexes); - for (Map.Entry<Index, String> entry : indexTableMap.entrySet()) { - //we rewrite the original query using the first valid index encountered - //this can be changed if we have a better mechanism to - //decide which index will produce a better rewrite - Index index = entry.getKey(); - String indexKeyName = entry.getValue(); - //break here if any valid index is found to apply rewrite - if (canApplyCtx.getIndexKey() != null && canApplyCtx.getIndexKey().equals(indexKeyName) - && checkIfAllRewriteCriteriaIsMet(canApplyCtx)) { - canApplyCtx.setAggFunction("_count_of_" + indexKeyName + ""); - canApplyCtx.addTable(canApplyCtx.getBaseTableName(), index.getIndexTableName()); - canApplyCtx.setIndexTableName(index.getIndexTableName()); - tsOpToProcess.put(alias, canApplyCtx); - return true; - } - } - return false; - } - - /** - * Get a list of indexes which can be used for rewrite. - * @return - * @throws SemanticException - */ - private Map<Table, List<Index>> getIndexesForRewrite() throws SemanticException{ - List<String> supportedIndexes = new ArrayList<String>(); - supportedIndexes.add(AggregateIndexHandler.class.getName()); - - // query the metastore to know what columns we have indexed - Collection<TableScanOperator> topTables = parseContext.getTopOps().values(); - Map<Table, List<Index>> indexes = new HashMap<Table, List<Index>>(); - for (TableScanOperator op : topTables) { - TableScanOperator tsOP = op; - List<Index> tblIndexes = IndexUtils.getIndexes(tsOP.getConf().getTableMetadata(), - supportedIndexes); - if (tblIndexes.size() > 0) { - indexes.put(tsOP.getConf().getTableMetadata(), tblIndexes); - } - } - - return indexes; - } - - /** - * This method checks if the index is built on all partitions of the base - * table. If not, then the method returns false as we do not apply optimization - * for this case. - * @param tableScan - * @param indexes - * @return - * @throws SemanticException - */ - private boolean checkIfIndexBuiltOnAllTablePartitions(TableScanOperator tableScan, - List<Index> indexes) throws SemanticException { - // check if we have indexes on all partitions in this table scan - Set<Partition> queryPartitions; - try { - queryPartitions = IndexUtils.checkPartitionsCoveredByIndex(tableScan, parseContext, indexes); - if (queryPartitions == null) { // partitions not covered - return false; - } - } catch (HiveException e) { - LOG.error("Fatal Error: problem accessing metastore", e); - throw new SemanticException(e); - } - if (queryPartitions.size() != 0) { - return true; - } - return false; - } - - /** - * This code block iterates over indexes on the table and populates the indexToKeys map - * for all the indexes that satisfy the rewrite criteria. - * @param indexTables - * @return - * @throws SemanticException - */ - Map<Index, String> getIndexToKeysMap(List<Index> indexTables) throws SemanticException{ - Hive hiveInstance = hiveDb; - Map<Index, String> indexToKeysMap = new LinkedHashMap<Index, String>(); - for (int idxCtr = 0; idxCtr < indexTables.size(); idxCtr++) { - Index index = indexTables.get(idxCtr); - //Getting index key columns - StorageDescriptor sd = index.getSd(); - List<FieldSchema> idxColList = sd.getCols(); - assert idxColList.size()==1; - String indexKeyName = idxColList.get(0).getName(); - // Check that the index schema is as expected. This code block should - // catch problems of this rewrite breaking when the AggregateIndexHandler - // index is changed. - List<String> idxTblColNames = new ArrayList<String>(); - try { - String[] qualified = Utilities.getDbTableName(index.getDbName(), - index.getIndexTableName()); - Table idxTbl = hiveInstance.getTable(qualified[0], qualified[1]); - for (FieldSchema idxTblCol : idxTbl.getCols()) { - idxTblColNames.add(idxTblCol.getName()); - } - } catch (HiveException e) { - LOG.error("Got exception while locating index table, " + - "skipping " + getName() + " optimization"); - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - throw new SemanticException(e.getMessage(), e); - } - assert(idxTblColNames.contains(IDX_BUCKET_COL)); - assert(idxTblColNames.contains(IDX_OFFSETS_ARRAY_COL)); - // we add all index tables which can be used for rewrite - // and defer the decision of using a particular index for later - // this is to allow choosing a index if a better mechanism is - // designed later to chose a better rewrite - indexToKeysMap.put(index, indexKeyName); - } - return indexToKeysMap; - } - - /** - * Method to rewrite the input query if all optimization criteria is passed. - * The method iterates over the tsOpToProcess {@link ArrayList} to apply the rewrites - * @throws SemanticException - * - */ - private void rewriteOriginalQuery() throws SemanticException { - for (RewriteCanApplyCtx canApplyCtx : tsOpToProcess.values()) { - RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = - RewriteQueryUsingAggregateIndexCtx.getInstance(parseContext, hiveDb, canApplyCtx); - rewriteQueryCtx.invokeRewriteQueryProc(); - parseContext = rewriteQueryCtx.getParseContext(); - } - LOG.info("Finished Rewriting query"); - } - - - /** - * This method logs the reason for which we cannot apply the rewrite optimization. - * @return - */ - boolean checkIfAllRewriteCriteriaIsMet(RewriteCanApplyCtx canApplyCtx) { - if (canApplyCtx.isSelClauseColsFetchException()) { - LOG.debug("Got exception while locating child col refs for select list, " + "skipping " - + getName() + " optimization."); - return false; - } - if (canApplyCtx.isAggFuncIsNotCount()) { - LOG.debug("Agg func other than count is " + "not supported by " + getName() - + " optimization."); - return false; - } - if (canApplyCtx.isAggParameterException()) { - LOG.debug("Got exception while locating parameter refs for aggregation, " + "skipping " - + getName() + " optimization."); - return false; - } - return true; - } -} - http://git-wip-us.apache.org/repos/asf/hive/blob/b0d3cb45/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java deleted file mode 100644 index 2a01d29..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer.index; - -import java.io.IOException; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.QueryState; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; -import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.parse.ParseDriver; -import org.apache.hadoop.hive.ql.parse.ParseException; -import org.apache.hadoop.hive.ql.parse.ParseUtils; -import org.apache.hadoop.hive.ql.parse.QB; -import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; -import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; - - -/** - * RewriteParseContextGenerator is a class that offers methods to generate operator tree - * for input queries. It is implemented on lines of the analyzeInternal(..) method - * of {@link SemanticAnalyzer} but it creates only the ParseContext for the input query command. - * It does not optimize or generate map-reduce tasks for the input query. - * This can be used when you need to create operator tree for an internal query. - * - */ -public final class RewriteParseContextGenerator { - - private static final Logger LOG = LoggerFactory.getLogger(RewriteParseContextGenerator.class.getName()); - - /** - * Parse the input {@link String} command and generate an operator tree. - * @param conf - * @param command - * @throws SemanticException - */ - public static Operator<? extends OperatorDesc> generateOperatorTree(QueryState queryState, - String command) throws SemanticException { - Operator<? extends OperatorDesc> operatorTree; - try { - Context ctx = new Context(queryState.getConf()); - ASTNode tree = ParseUtils.parse(command, ctx); - - BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, tree); - assert(sem instanceof SemanticAnalyzer); - operatorTree = doSemanticAnalysis((SemanticAnalyzer) sem, tree, ctx); - LOG.info("Sub-query Semantic Analysis Completed"); - } catch (IOException e) { - LOG.error("IOException in generating the operator " + - "tree for input command - " + command + " " , e); - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - throw new SemanticException(e.getMessage(), e); - } catch (ParseException e) { - LOG.error("ParseException in generating the operator " + - "tree for input command - " + command + " " , e); - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - throw new SemanticException(e.getMessage(), e); - } catch (SemanticException e) { - LOG.error("SemanticException in generating the operator " + - "tree for input command - " + command + " " , e); - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - throw new SemanticException(e.getMessage(), e); - } - return operatorTree; - } - - /** - * For the input ASTNode tree, perform a semantic analysis and check metadata - * Generate a operator tree and return it. - * - * @param ctx - * @param sem - * @param ast - * @return - * @throws SemanticException - */ - private static Operator<?> doSemanticAnalysis(SemanticAnalyzer sem, - ASTNode ast, Context ctx) throws SemanticException { - QB qb = new QB(null, null, false); - ASTNode child = ast; - ParseContext subPCtx = sem.getParseContext(); - subPCtx.setContext(ctx); - sem.initParseCtx(subPCtx); - - LOG.info("Starting Sub-query Semantic Analysis"); - sem.doPhase1(child, qb, sem.initPhase1Ctx(), null); - LOG.info("Completed phase 1 of Sub-query Semantic Analysis"); - - sem.getMetaData(qb); - LOG.info("Completed getting MetaData in Sub-query Semantic Analysis"); - - LOG.info("Sub-query Abstract syntax tree: " + ast.toStringTree()); - Operator<?> operator = sem.genPlan(qb); - - LOG.info("Sub-query Completed plan generation"); - return operator; - } - -}