Repository: hive Updated Branches: refs/heads/master 542eaf6bc -> 7df62023f
http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java b/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java index 9089d1c..1157033 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.metastore; -import java.nio.ByteBuffer; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -25,6 +24,7 @@ import java.util.List; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.FileMetadataExprType; import org.apache.hadoop.hive.metastore.api.Function; import org.apache.hadoop.hive.metastore.api.InvalidInputException; import org.apache.hadoop.hive.metastore.api.InvalidObjectException; @@ -71,12 +71,17 @@ public class TestObjectStore { } @Override + public FileMetadataExprType getMetadataType(String inputFormat) { + return null; + } + + @Override public SearchArgument createSarg(byte[] expr) { return null; } @Override - public ByteBuffer applySargToFileMetadata(SearchArgument sarg, ByteBuffer byteBuffer) { + public FileFormatProxy getFileFormatProxy(FileMetadataExprType type) { return null; } } http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/MockUtils.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/MockUtils.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/MockUtils.java index 983129a..784648a 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/MockUtils.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/MockUtils.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.metastore.hbase; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; @@ -29,7 +31,9 @@ import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.FileFormatProxy; import org.apache.hadoop.hive.metastore.PartitionExpressionProxy; +import org.apache.hadoop.hive.metastore.api.FileMetadataExprType; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -73,10 +77,14 @@ public class MockUtils { } @Override - public ByteBuffer applySargToFileMetadata(SearchArgument sarg, ByteBuffer byteBuffer) { + public FileMetadataExprType getMetadataType(String inputFormat) { return null; } + @Override + public FileFormatProxy getFileFormatProxy(FileMetadataExprType type) { + return null; + } } static HBaseStore init(Configuration conf, HTableInterface htable, http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 290f489..30cae88 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -104,6 +104,7 @@ import org.apache.hadoop.hive.ql.plan.AlterTableDesc; import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; import org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition; import org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc; +import org.apache.hadoop.hive.ql.plan.CacheMetadataDesc; import org.apache.hadoop.hive.ql.plan.CreateDatabaseDesc; import org.apache.hadoop.hive.ql.plan.CreateIndexDesc; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; @@ -518,6 +519,11 @@ public class DDLTask extends Task<DDLWork> implements Serializable { if (alterTableExchangePartition != null) { return exchangeTablePartition(db, alterTableExchangePartition); } + + CacheMetadataDesc cacheMetadataDesc = work.getCacheMetadataDesc(); + if (cacheMetadataDesc != null) { + return cacheMetadata(db, cacheMetadataDesc); + } } catch (Throwable e) { failed(e); return 1; @@ -526,6 +532,12 @@ public class DDLTask extends Task<DDLWork> implements Serializable { return 0; } + private int cacheMetadata(Hive db, CacheMetadataDesc desc) throws HiveException { + db.cacheFileMetadata(desc.getDbName(), desc.getTableName(), + desc.getPartName(), desc.isAllParts()); + return 0; + } + private void failed(Throwable e) { while (e.getCause() != null && e.getClass() == RuntimeException.class) { e = e.getCause(); http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileFormatProxy.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileFormatProxy.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileFormatProxy.java new file mode 100644 index 0000000..ef76723 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileFormatProxy.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.orc; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.FileFormatProxy; +import org.apache.hadoop.hive.metastore.Metastore.SplitInfo; +import org.apache.hadoop.hive.metastore.Metastore.SplitInfos; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.orc.OrcProto; +import org.apache.orc.StripeInformation; + +/** File format proxy for ORC. */ +public class OrcFileFormatProxy implements FileFormatProxy { + + @Override + public ByteBuffer applySargToMetadata( + SearchArgument sarg, ByteBuffer byteBuffer) throws IOException { + // TODO: ideally we should store shortened representation of only the necessary fields + // in HBase; it will probably require custom SARG application code. + ReaderImpl.FooterInfo fi = ReaderImpl.extractMetaInfoFromFooter(byteBuffer, null); + OrcProto.Footer footer = fi.getFooter(); + int stripeCount = footer.getStripesCount(); + boolean[] result = OrcInputFormat.pickStripesViaTranslatedSarg( + sarg, fi.getFileMetaInfo().getWriterVersion(), + footer.getTypesList(), fi.getMetadata(), stripeCount); + // For ORC case, send the boundaries of the stripes so we don't have to send the footer. + SplitInfos.Builder sb = SplitInfos.newBuilder(); + List<StripeInformation> stripes = fi.getStripes(); + boolean isEliminated = true; + for (int i = 0; i < result.length; ++i) { + if (result != null && !result[i]) continue; + isEliminated = false; + StripeInformation si = stripes.get(i); + sb.addInfos(SplitInfo.newBuilder().setIndex(i) + .setOffset(si.getOffset()).setLength(si.getLength())); + } + return isEliminated ? null : ByteBuffer.wrap(sb.build().toByteArray()); + } + + public ByteBuffer[] getAddedColumnsToCache() { + return null; // Nothing so far. + } + + public ByteBuffer[][] getAddedValuesToCache(List<ByteBuffer> metadata) { + throw new UnsupportedOperationException(); // Nothing so far (and shouldn't be called). + } + + public ByteBuffer getMetadataToCache( + FileSystem fs, Path path, ByteBuffer[] addedVals) throws IOException { + // For now, there's nothing special to return in addedVals. Just return the footer. + return OrcFile.createReader(fs, path).getSerializedFileFooter(); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index c682df2..29df4f9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -3401,4 +3401,17 @@ private void constructOneLBLocationMap(FileStatus fSta, throw new HiveException(e); } } + + public void cacheFileMetadata( + String dbName, String tableName, String partName, boolean allParts) throws HiveException { + try { + boolean willCache = getMSC().cacheFileMetadata(dbName, tableName, partName, allParts); + if (!willCache) { + throw new HiveException( + "Caching file metadata is not supported by metastore or for this file format"); + } + } catch (TException e) { + throw new HiveException(e); + } + } }; http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java index 7cddcc9..96910e3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java @@ -18,18 +18,16 @@ package org.apache.hadoop.hive.ql.optimizer.ppr; -import java.io.IOException; -import java.nio.ByteBuffer; +import org.apache.hadoop.hive.metastore.api.FileMetadataExprType; + import java.util.List; -import org.apache.hadoop.hive.metastore.Metastore.SplitInfo; -import org.apache.hadoop.hive.metastore.Metastore.SplitInfos; +import org.apache.hadoop.hive.metastore.FileFormatProxy; import org.apache.hadoop.hive.metastore.PartitionExpressionProxy; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; +import org.apache.hadoop.hive.ql.io.orc.OrcFileFormatProxy; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; -import org.apache.hadoop.hive.ql.io.orc.ReaderImpl; -import org.apache.orc.StripeInformation; import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -37,7 +35,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.orc.OrcProto; /** * The basic implementation of PartitionExpressionProxy that uses ql package classes. @@ -83,32 +80,29 @@ public class PartitionExpressionForMetastore implements PartitionExpressionProxy } @Override - public SearchArgument createSarg(byte[] expr) { - return ConvertAstToSearchArg.create(expr); + public FileFormatProxy getFileFormatProxy(FileMetadataExprType type) { + switch (type) { + case ORC_SARG: return new OrcFileFormatProxy(); + default: throw new RuntimeException("Unsupported format " + type); + } } @Override - public ByteBuffer applySargToFileMetadata( - SearchArgument sarg, ByteBuffer byteBuffer) throws IOException { - // TODO: ideally we should store shortened representation of only the necessary fields - // in HBase; it will probably require custom SARG application code. - ReaderImpl.FooterInfo fi = ReaderImpl.extractMetaInfoFromFooter(byteBuffer, null); - OrcProto.Footer footer = fi.getFooter(); - int stripeCount = footer.getStripesCount(); - boolean[] result = OrcInputFormat.pickStripesViaTranslatedSarg( - sarg, fi.getFileMetaInfo().getWriterVersion(), - footer.getTypesList(), fi.getMetadata(), stripeCount); - // For ORC case, send the boundaries of the stripes so we don't have to send the footer. - SplitInfos.Builder sb = SplitInfos.newBuilder(); - List<StripeInformation> stripes = fi.getStripes(); - boolean isEliminated = true; - for (int i = 0; i < result.length; ++i) { - if (result != null && !result[i]) continue; - isEliminated = false; - StripeInformation si = stripes.get(i); - sb.addInfos(SplitInfo.newBuilder().setIndex(i) - .setOffset(si.getOffset()).setLength(si.getLength())); + public FileMetadataExprType getMetadataType(String inputFormat) { + try { + Class<?> ifClass = Class.forName(inputFormat); + if (OrcInputFormat.class.isAssignableFrom(ifClass)) { + return FileMetadataExprType.ORC_SARG; + } + return null; + } catch (Throwable t) { + LOG.warn("Can't create the class for input format " + inputFormat, t); + return null; } - return isEliminated ? null : ByteBuffer.wrap(sb.build().toByteArray()); + } + + @Override + public SearchArgument createSarg(byte[] expr) { + return ConvertAstToSearchArg.create(expr); } } http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/ql/src/java/org/apache/hadoop/hive/ql/parse/AnalyzeCommandUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/AnalyzeCommandUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/AnalyzeCommandUtils.java new file mode 100644 index 0000000..ac1383c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/AnalyzeCommandUtils.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.session.SessionState; + +public class AnalyzeCommandUtils { + public static boolean isPartitionLevelStats(ASTNode tree) { + boolean isPartitioned = false; + ASTNode child = (ASTNode) tree.getChild(0); + if (child.getChildCount() > 1) { + child = (ASTNode) child.getChild(1); + if (child.getToken().getType() == HiveParser.TOK_PARTSPEC) { + isPartitioned = true; + } + } + return isPartitioned; + } + + public static Table getTable(ASTNode tree, BaseSemanticAnalyzer sa) throws SemanticException { + String tableName = ColumnStatsSemanticAnalyzer.getUnescapedName((ASTNode) tree.getChild(0).getChild(0)); + String currentDb = SessionState.get().getCurrentDatabase(); + String [] names = Utilities.getDbTableName(currentDb, tableName); + return sa.getTable(names[0], names[1], true); + } + + public static Map<String,String> getPartKeyValuePairsFromAST(Table tbl, ASTNode tree, + HiveConf hiveConf) throws SemanticException { + ASTNode child = ((ASTNode) tree.getChild(0).getChild(1)); + Map<String,String> partSpec = new HashMap<String, String>(); + if (child != null) { + partSpec = DDLSemanticAnalyzer.getValidatedPartSpec(tbl, child, hiveConf, false); + } //otherwise, it is the case of analyze table T compute statistics for columns; + return partSpec; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 832a5bc..1f30cbd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.parse; import java.io.IOException; -import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -83,35 +82,6 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer { return rwt; } - private boolean isPartitionLevelStats(ASTNode tree) { - boolean isPartitioned = false; - ASTNode child = (ASTNode) tree.getChild(0); - if (child.getChildCount() > 1) { - child = (ASTNode) child.getChild(1); - if (child.getToken().getType() == HiveParser.TOK_PARTSPEC) { - isPartitioned = true; - } - } - return isPartitioned; - } - - private Table getTable(ASTNode tree) throws SemanticException { - String tableName = getUnescapedName((ASTNode) tree.getChild(0).getChild(0)); - String currentDb = SessionState.get().getCurrentDatabase(); - String [] names = Utilities.getDbTableName(currentDb, tableName); - return getTable(names[0], names[1], true); - } - - private Map<String,String> getPartKeyValuePairsFromAST(Table tbl, ASTNode tree, - HiveConf hiveConf) throws SemanticException { - ASTNode child = ((ASTNode) tree.getChild(0).getChild(1)); - Map<String,String> partSpec = new HashMap<String, String>(); - if (child != null) { - partSpec = DDLSemanticAnalyzer.getValidatedPartSpec(tbl, child, hiveConf, false); - } //otherwise, it is the case of analyze table T compute statistics for columns; - return partSpec; - } - private List<String> getColumnName(ASTNode tree) throws SemanticException{ switch (tree.getChildCount()) { @@ -405,11 +375,11 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer { * an aggregation. */ if (shouldRewrite(ast)) { - tbl = getTable(ast); + tbl = AnalyzeCommandUtils.getTable(ast, this); colNames = getColumnName(ast); // Save away the original AST originalTree = ast; - boolean isPartitionStats = isPartitionLevelStats(ast); + boolean isPartitionStats = AnalyzeCommandUtils.isPartitionLevelStats(ast); Map<String,String> partSpec = null; checkForPartitionColumns( colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); @@ -420,7 +390,7 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer { if (isPartitionStats) { isTableLevel = false; - partSpec = getPartKeyValuePairsFromAST(tbl, ast, conf); + partSpec = AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf); handlePartialPartitionSpec(partSpec); } else { isTableLevel = true; http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index c407aae..5e6b606 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -26,6 +26,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -79,10 +80,12 @@ import org.apache.hadoop.hive.ql.plan.AlterTableDesc; import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; import org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition; import org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc; +import org.apache.hadoop.hive.ql.plan.CacheMetadataDesc; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork; import org.apache.hadoop.hive.ql.plan.CreateDatabaseDesc; import org.apache.hadoop.hive.ql.plan.CreateIndexDesc; +import org.apache.hadoop.hive.ql.plan.DDLDesc; import org.apache.hadoop.hive.ql.plan.DDLWork; import org.apache.hadoop.hive.ql.plan.DescDatabaseDesc; import org.apache.hadoop.hive.ql.plan.DescFunctionDesc; @@ -492,7 +495,10 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { case HiveParser.TOK_SHOW_SET_ROLE: analyzeSetShowRole(ast); break; - default: + case HiveParser.TOK_CACHE_METADATA: + analyzeCacheMetadata(ast); + break; + default: throw new SemanticException("Unsupported command."); } if (fetchTask != null && !rootTasks.isEmpty()) { @@ -500,6 +506,24 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { } } + private void analyzeCacheMetadata(ASTNode ast) throws SemanticException { + Table tbl = AnalyzeCommandUtils.getTable(ast, this); + Map<String,String> partSpec = null; + CacheMetadataDesc desc; + // In 2 cases out of 3, we could pass the path and type directly to metastore... + if (AnalyzeCommandUtils.isPartitionLevelStats(ast)) { + partSpec = AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf); + Partition part = getPartition(tbl, partSpec, true); + desc = new CacheMetadataDesc(tbl.getDbName(), tbl.getTableName(), part.getName()); + inputs.add(new ReadEntity(part)); + } else { + // Should we get all partitions for a partitioned table? + desc = new CacheMetadataDesc(tbl.getDbName(), tbl.getTableName(), tbl.isPartitioned()); + inputs.add(new ReadEntity(tbl)); + } + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc), conf)); + } + private void analyzeAlterTableUpdateStats(ASTNode ast, String tblName, Map<String, String> partSpec) throws SemanticException { String colName = getUnescapedName((ASTNode) ast.getChild(0)); @@ -511,7 +535,6 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { try { partName = Warehouse.makePartName(partSpec, false); } catch (MetaException e) { - // TODO Auto-generated catch block throw new SemanticException("partition " + partSpec.toString() + " not found"); } http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 1c72b1c..4c4470b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -312,6 +312,7 @@ KW_ISOLATION: 'ISOLATION'; KW_LEVEL: 'LEVEL'; KW_SNAPSHOT: 'SNAPSHOT'; KW_AUTOCOMMIT: 'AUTOCOMMIT'; +KW_CACHE: 'CACHE'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index d5051ce..5f14c6b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -364,6 +364,7 @@ TOK_TXN_READ_WRITE; TOK_COMMIT; TOK_ROLLBACK; TOK_SET_AUTOCOMMIT; +TOK_CACHE_METADATA; } @@ -1370,9 +1371,14 @@ descStatement analyzeStatement @init { pushMsg("analyze statement", state); } @after { popMsg(state); } - : KW_ANALYZE KW_TABLE (parttype=tableOrPartition) KW_COMPUTE KW_STATISTICS ((noscan=KW_NOSCAN) | (partialscan=KW_PARTIALSCAN) + : KW_ANALYZE KW_TABLE (parttype=tableOrPartition) + ( + (KW_COMPUTE) => KW_COMPUTE KW_STATISTICS ((noscan=KW_NOSCAN) | (partialscan=KW_PARTIALSCAN) | (KW_FOR KW_COLUMNS (statsColumnName=columnNameList)?))? -> ^(TOK_ANALYZE $parttype $noscan? $partialscan? KW_COLUMNS? $statsColumnName?) + | + (KW_CACHE) => KW_CACHE KW_METADATA -> ^(TOK_CACHE_METADATA $parttype) + ) ; showStatement http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java index 0affe84..98860c6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java @@ -110,6 +110,7 @@ public final class SemanticAnalyzerFactory { commandType.put(HiveParser.TOK_DESCDATABASE, HiveOperation.DESCDATABASE); commandType.put(HiveParser.TOK_ALTERTABLE_SKEWED, HiveOperation.ALTERTABLE_SKEWED); commandType.put(HiveParser.TOK_ANALYZE, HiveOperation.ANALYZE_TABLE); + commandType.put(HiveParser.TOK_CACHE_METADATA, HiveOperation.CACHE_METADATA); commandType.put(HiveParser.TOK_ALTERTABLE_PARTCOLTYPE, HiveOperation.ALTERTABLE_PARTCOLTYPE); commandType.put(HiveParser.TOK_SHOW_COMPACTIONS, HiveOperation.SHOW_COMPACTIONS); commandType.put(HiveParser.TOK_SHOW_TRANSACTIONS, HiveOperation.SHOW_TRANSACTIONS); @@ -259,6 +260,7 @@ public final class SemanticAnalyzerFactory { case HiveParser.TOK_ALTERDATABASE_OWNER: case HiveParser.TOK_TRUNCATETABLE: case HiveParser.TOK_SHOW_SET_ROLE: + case HiveParser.TOK_CACHE_METADATA: return new DDLSemanticAnalyzer(conf); case HiveParser.TOK_CREATEFUNCTION: http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/ql/src/java/org/apache/hadoop/hive/ql/plan/CacheMetadataDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/CacheMetadataDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/CacheMetadataDesc.java new file mode 100644 index 0000000..1649b40 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/CacheMetadataDesc.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + + +@SuppressWarnings("serial") +public class CacheMetadataDesc extends DDLDesc { + private final String dbName, tableName, partName; + private final boolean isAllParts; + + public CacheMetadataDesc(String dbName, String tableName, String partName) { + this(dbName, tableName, partName, false); + } + + public CacheMetadataDesc(String dbName, String tableName, boolean isAllParts) { + this(dbName, tableName, null, isAllParts); + } + + private CacheMetadataDesc(String dbName, String tableName, String partName, boolean isAllParts) { + super(); + this.dbName = dbName; + this.tableName = tableName; + this.partName = partName; + this.isAllParts = isAllParts; + } + + public boolean isAllParts() { + return isAllParts; + } + + public String getPartName() { + return partName; + } + + public String getDbName() { + return dbName; + } + + public String getTableName() { + return tableName; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java index a4c3db1..7bb818c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java @@ -91,6 +91,7 @@ public class DDLWork implements Serializable { */ protected HashSet<WriteEntity> outputs; private AlterTablePartMergeFilesDesc mergeFilesDesc; + private CacheMetadataDesc cacheMetadataDesc; public DDLWork() { } @@ -510,6 +511,12 @@ public class DDLWork implements Serializable { this.alterTableExchangePartition = alterTableExchangePartition; } + public DDLWork(HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs, + CacheMetadataDesc cacheMetadataDesc) { + this(inputs, outputs); + this.cacheMetadataDesc = cacheMetadataDesc; + } + /** * @return Create Database descriptor */ @@ -1140,6 +1147,13 @@ public class DDLWork implements Serializable { } /** + * @return information about the metadata to be cached + */ + public CacheMetadataDesc getCacheMetadataDesc() { + return this.cacheMetadataDesc; + } + + /** * @param alterTableExchangePartition * set the value of the table partition to be exchanged */ http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java index af7e43e..07134b3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java @@ -55,6 +55,7 @@ public enum HiveOperation { ALTERTABLE_CLUSTER_SORT("ALTERTABLE_CLUSTER_SORT", new Privilege[]{Privilege.ALTER_METADATA}, null), ANALYZE_TABLE("ANALYZE_TABLE", null, null), + CACHE_METADATA("CACHE_METADATA", new Privilege[]{Privilege.SELECT}, null), ALTERTABLE_BUCKETNUM("ALTERTABLE_BUCKETNUM", new Privilege[]{Privilege.ALTER_METADATA}, null), ALTERPARTITION_BUCKETNUM("ALTERPARTITION_BUCKETNUM", http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/ql/src/test/queries/clientpositive/stats_filemetadata.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/stats_filemetadata.q b/ql/src/test/queries/clientpositive/stats_filemetadata.q new file mode 100644 index 0000000..dc9f242 --- /dev/null +++ b/ql/src/test/queries/clientpositive/stats_filemetadata.q @@ -0,0 +1,17 @@ +set hive.mapred.mode=nonstrict; + +CREATE TABLE many_files(key string, value string) +partitioned by (ds string) +clustered by (key) into 4 buckets +stored as orc; + +insert overwrite table many_files partition (ds='1') select * from src; +insert overwrite table many_files partition (ds='2') select * from src; + +dfs -ls -R ${hiveconf:hive.metastore.warehouse.dir}/many_files/; + +analyze table many_files cache metadata; + +set hive.fetch.task.conversion=none; + +select sum(hash(*)) from many_files; http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/ql/src/test/results/clientpositive/tez/stats_filemetadata.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/stats_filemetadata.q.out b/ql/src/test/results/clientpositive/tez/stats_filemetadata.q.out new file mode 100644 index 0000000..a86e1c1 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/stats_filemetadata.q.out @@ -0,0 +1,54 @@ +PREHOOK: query: CREATE TABLE many_files(key string, value string) +partitioned by (ds string) +clustered by (key) into 4 buckets +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@many_files +POSTHOOK: query: CREATE TABLE many_files(key string, value string) +partitioned by (ds string) +clustered by (key) into 4 buckets +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@many_files +PREHOOK: query: insert overwrite table many_files partition (ds='1') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@many_files@ds=1 +POSTHOOK: query: insert overwrite table many_files partition (ds='1') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@many_files@ds=1 +POSTHOOK: Lineage: many_files PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: many_files PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table many_files partition (ds='2') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@many_files@ds=2 +POSTHOOK: query: insert overwrite table many_files partition (ds='2') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@many_files@ds=2 +POSTHOOK: Lineage: many_files PARTITION(ds=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: many_files PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +#### A masked pattern was here #### +PREHOOK: query: analyze table many_files cache metadata +PREHOOK: type: CACHE_METADATA +PREHOOK: Input: default@many_files +POSTHOOK: query: analyze table many_files cache metadata +POSTHOOK: type: CACHE_METADATA +POSTHOOK: Input: default@many_files +PREHOOK: query: select sum(hash(*)) from many_files +PREHOOK: type: QUERY +PREHOOK: Input: default@many_files +PREHOOK: Input: default@many_files@ds=1 +PREHOOK: Input: default@many_files@ds=2 +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from many_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@many_files +POSTHOOK: Input: default@many_files@ds=1 +POSTHOOK: Input: default@many_files@ds=2 +#### A masked pattern was here #### +73724366848 http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/service/src/gen/thrift/gen-py/hive_service/ThriftHive-remote ---------------------------------------------------------------------- diff --git a/service/src/gen/thrift/gen-py/hive_service/ThriftHive-remote b/service/src/gen/thrift/gen-py/hive_service/ThriftHive-remote index e167d5b..4bd7a5a 100755 --- a/service/src/gen/thrift/gen-py/hive_service/ThriftHive-remote +++ b/service/src/gen/thrift/gen-py/hive_service/ThriftHive-remote @@ -54,6 +54,7 @@ if len(sys.argv) <= 1 or sys.argv[1] == '--help': print(' void drop_table(string dbname, string name, bool deleteData)') print(' void drop_table_with_environment_context(string dbname, string name, bool deleteData, EnvironmentContext environment_context)') print(' get_tables(string db_name, string pattern)') + print(' get_table_meta(string db_patterns, string tbl_patterns, tbl_types)') print(' get_all_tables(string db_name)') print(' Table get_table(string dbname, string tbl_name)') print(' get_table_objects_by_name(string dbname, tbl_names)') @@ -77,6 +78,7 @@ if len(sys.argv) <= 1 or sys.argv[1] == '--help': print(' DropPartitionsResult drop_partitions_req(DropPartitionsRequest req)') print(' Partition get_partition(string db_name, string tbl_name, part_vals)') print(' Partition exchange_partition( partitionSpecs, string source_db, string source_table_name, string dest_db, string dest_table_name)') + print(' exchange_partitions( partitionSpecs, string source_db, string source_table_name, string dest_db, string dest_table_name)') print(' Partition get_partition_with_auth(string db_name, string tbl_name, part_vals, string user_name, group_names)') print(' Partition get_partition_by_name(string db_name, string tbl_name, string part_name)') print(' get_partitions(string db_name, string tbl_name, i16 max_parts)') @@ -162,6 +164,7 @@ if len(sys.argv) <= 1 or sys.argv[1] == '--help': print(' GetFileMetadataResult get_file_metadata(GetFileMetadataRequest req)') print(' PutFileMetadataResult put_file_metadata(PutFileMetadataRequest req)') print(' ClearFileMetadataResult clear_file_metadata(ClearFileMetadataRequest req)') + print(' CacheFileMetadataResult cache_file_metadata(CacheFileMetadataRequest req)') print(' string getName()') print(' string getVersion()') print(' fb_status getStatus()') @@ -411,6 +414,12 @@ elif cmd == 'get_tables': sys.exit(1) pp.pprint(client.get_tables(args[0],args[1],)) +elif cmd == 'get_table_meta': + if len(args) != 3: + print('get_table_meta requires 3 args') + sys.exit(1) + pp.pprint(client.get_table_meta(args[0],args[1],eval(args[2]),)) + elif cmd == 'get_all_tables': if len(args) != 1: print('get_all_tables requires 1 args') @@ -549,6 +558,12 @@ elif cmd == 'exchange_partition': sys.exit(1) pp.pprint(client.exchange_partition(eval(args[0]),args[1],args[2],args[3],args[4],)) +elif cmd == 'exchange_partitions': + if len(args) != 5: + print('exchange_partitions requires 5 args') + sys.exit(1) + pp.pprint(client.exchange_partitions(eval(args[0]),args[1],args[2],args[3],args[4],)) + elif cmd == 'get_partition_with_auth': if len(args) != 5: print('get_partition_with_auth requires 5 args') @@ -1059,6 +1074,12 @@ elif cmd == 'clear_file_metadata': sys.exit(1) pp.pprint(client.clear_file_metadata(eval(args[0]),)) +elif cmd == 'cache_file_metadata': + if len(args) != 1: + print('cache_file_metadata requires 1 args') + sys.exit(1) + pp.pprint(client.cache_file_metadata(eval(args[0]),)) + elif cmd == 'getName': if len(args) != 0: print('getName requires 0 args') http://git-wip-us.apache.org/repos/asf/hive/blob/7df62023/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java ---------------------------------------------------------------------- diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java b/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java new file mode 100644 index 0000000..c90b34c --- /dev/null +++ b/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.io; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hive.shims.HadoopShims; +import org.apache.hadoop.hive.shims.ShimLoader; + +public class HdfsUtils { + private static final HadoopShims SHIMS = ShimLoader.getHadoopShims(); + private static final Log LOG = LogFactory.getLog(HdfsUtils.class); + + public static long getFileId(FileSystem fileSystem, Path path) throws IOException { + String pathStr = path.toUri().getPath(); + if (fileSystem instanceof DistributedFileSystem) { + return SHIMS.getFileId(fileSystem, pathStr); + } + // If we are not on DFS, we just hash the file name + size and hope for the best. + // TODO: we assume it only happens in tests. Fix? + int nameHash = pathStr.hashCode(); + long fileSize = fileSystem.getFileStatus(path).getLen(); + long id = ((fileSize ^ (fileSize >>> 32)) << 32) | ((long)nameHash & 0xffffffffL); + LOG.warn("Cannot get unique file ID from " + + fileSystem.getClass().getSimpleName() + "; using " + id + "(" + pathStr + + "," + nameHash + "," + fileSize + ")"); + return id; + } + + // TODO: this relies on HDFS not changing the format; we assume if we could get inode ID, this + // is still going to work. Otherwise, file IDs can be turned off. Later, we should use + // as public utility method in HDFS to obtain the inode-based path. + private static String HDFS_ID_PATH_PREFIX = "/.reserved/.inodes/"; + + public static Path getFileIdPath( + FileSystem fileSystem, Path path, long fileId) { + return (fileSystem instanceof DistributedFileSystem) + ? new Path(HDFS_ID_PATH_PREFIX + fileId) : path; + } +}