This is an automated email from the ASF dual-hosted git repository. vgarg pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new b7650eb HIVE-22366: Multiple metastore calls for same table and constraints during planning (Vineet Garg, reviewed by Jesus Camacho Rodriguez) b7650eb is described below commit b7650eb782f3cf6fb3d8dd46356dd8d230412332 Author: Vineet Garg <vg...@apache.org> AuthorDate: Mon Jan 27 10:45:14 2020 -0800 HIVE-22366: Multiple metastore calls for same table and constraints during planning (Vineet Garg, reviewed by Jesus Camacho Rodriguez) --- .../org/apache/hadoop/hive/ql/metadata/Table.java | 249 +++++++++++++++------ .../hadoop/hive/ql/parse/CalcitePlanner.java | 23 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 10 +- .../apache/hadoop/hive/ql/stats/StatsUtils.java | 2 +- .../hadoop/hive/metastore/HiveMetaStoreClient.java | 6 + 5 files changed, 201 insertions(+), 89 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 33a4505..b9bb3ab 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -121,6 +121,16 @@ public class Table implements Serializable { private transient DefaultConstraint dc; private transient CheckConstraint cc; + /** Constraint related flags + * This is to track if constraints are retrieved from metastore or not + */ + private transient boolean isPKFetched=false; + private transient boolean isFKFetched=false; + private transient boolean isUniqueFetched=false; + private transient boolean isNotNullFetched=false; + private transient boolean isDefaultFetched=false; + private transient boolean isCheckFetched=false; + /** * Used only for serialization. */ @@ -147,6 +157,19 @@ public class Table implements Serializable { this(getEmptyTable(databaseName, tableName)); } + /** This api is used by getMetaData which require deep copy of metastore.api.table + * and constraints copy + */ + public Table makeCopy() { + + // make deep copy of metastore.api.table + Table newTab = new Table(this.getTTable().deepCopy()); + + // copy constraints + newTab.copyConstraints(this); + return newTab; + } + public boolean isDummyTable() { return tTable.getTableName().equals(SemanticAnalyzer.DUMMY_TABLE); } @@ -171,7 +194,7 @@ public class Table implements Serializable { * Initialize an empty table. */ public static org.apache.hadoop.hive.metastore.api.Table - getEmptyTable(String databaseName, String tableName) { + getEmptyTable(String databaseName, String tableName) { StorageDescriptor sd = new StorageDescriptor(); { sd.setSerdeInfo(new SerDeInfo()); @@ -316,8 +339,8 @@ public class Table implements Serializable { try { storageHandler = HiveUtils.getStorageHandler( SessionState.getSessionConf(), - getProperty( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE)); + getProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE)); } catch (Exception e) { throw new RuntimeException(e); } @@ -343,7 +366,7 @@ public class Table implements Serializable { inputFormatClass = getStorageHandler().getInputFormatClass(); } else { inputFormatClass = (Class<? extends InputFormat>) - Class.forName(className, true, Utilities.getSessionSpecifiedClassLoader()); + Class.forName(className, true, Utilities.getSessionSpecifiedClassLoader()); } } catch (ClassNotFoundException e) { throw new RuntimeException(e); @@ -449,12 +472,12 @@ public class Table implements Serializable { } public void setTableType(TableType tableType) { - tTable.setTableType(tableType.toString()); - } + tTable.setTableType(tableType.toString()); + } public TableType getTableType() { - return Enum.valueOf(TableType.class, tTable.getTableType()); - } + return Enum.valueOf(TableType.class, tTable.getTableType()); + } public ArrayList<StructField> getFields() { @@ -491,46 +514,46 @@ public class Table implements Serializable { getProperty(hive_metastoreConstants.TABLE_BUCKETING_VERSION)); } - @Override + @Override public String toString() { return tTable.getTableName(); } - /* (non-Javadoc) - * @see java.lang.Object#hashCode() - */ - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((tTable == null) ? 0 : tTable.hashCode()); - return result; - } - - /* (non-Javadoc) - * @see java.lang.Object#equals(java.lang.Object) - */ - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } - Table other = (Table) obj; - if (tTable == null) { - if (other.tTable != null) { - return false; - } - } else if (!tTable.equals(other.tTable)) { - return false; - } - return true; - } + /* (non-Javadoc) + * @see java.lang.Object#hashCode() + */ + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((tTable == null) ? 0 : tTable.hashCode()); + return result; + } + + /* (non-Javadoc) + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + Table other = (Table) obj; + if (tTable == null) { + if (other.tTable != null) { + return false; + } + } else if (!tTable.equals(other.tTable)) { + return false; + } + return true; + } public List<FieldSchema> getPartCols() { @@ -596,7 +619,7 @@ public class Table implements Serializable { for (String col : bucketCols) { if (!isField(col)) { throw new HiveException("Bucket columns " + col - + " is not part of the table columns (" + getCols() ); + + " is not part of the table columns (" + getCols()); } } tTable.getSd().setBucketCols(bucketCols); @@ -619,7 +642,7 @@ public class Table implements Serializable { mappings.put(valList, dirName); } - public Map<List<String>,String> getSkewedColValueLocationMaps() { + public Map<List<String>, String> getSkewedColValueLocationMaps() { return (tTable.getSd().getSkewedInfo() != null) ? tTable.getSd().getSkewedInfo() .getSkewedColValueLocationMaps() : new HashMap<List<String>, String>(); } @@ -977,8 +1000,8 @@ public class Table implements Serializable { public boolean isNonNative() { return getProperty( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE) - != null; + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE) + != null; } public String getFullyQualifiedName() { @@ -1057,7 +1080,7 @@ public class Table implements Serializable { try { Class<?> clazz = conf.getClassByName(serdeLib); if (!AbstractSerDe.class.isAssignableFrom(clazz)) - { + { return true; // The default. } deserializer = ReflectionUtil.newInstance( @@ -1126,71 +1149,167 @@ public class Table implements Serializable { return outdatedForRewritingMaterializedView; } - /* These are only populated during optimization and describing */ + public ColumnStatistics getColStats() { + return tTable.isSetColStats() ? tTable.getColStats() : null; + } + + /** + * Setup the table level stats as if the table is new. Used when setting up Table for a new + * table or during replication. + */ + public void setStatsStateLikeNewTable() { + if (isPartitioned()) { + StatsSetupConst.setStatsStateForCreateTable(getParameters(), null, + StatsSetupConst.FALSE); + } else { + StatsSetupConst.setStatsStateForCreateTable(getParameters(), + MetaStoreUtils.getColumnNames(getCols()), StatsSetupConst.TRUE); + } + } + + /** Constraints related methods + * Note that set apis are used by DESCRIBE only, although get apis return RELY or ENABLE + * constraints DESCRIBE could set all type of constraints + * */ + + /* This only return PK which are created with RELY */ public PrimaryKeyInfo getPrimaryKeyInfo() { + if(!this.isPKFetched) { + try { + pki = Hive.get().getReliablePrimaryKeys(this.getDbName(), this.getTableName()); + this.isPKFetched = true; + } catch (HiveException e) { + LOG.warn("Cannot retrieve PK info for table : " + this.getTableName() + + " ignoring exception: " + e); + } + } return pki; } public void setPrimaryKeyInfo(PrimaryKeyInfo pki) { this.pki = pki; + this.isPKFetched = true; } + /* This only return FK constraints which are created with RELY */ public ForeignKeyInfo getForeignKeyInfo() { + if(!isFKFetched) { + try { + fki = Hive.get().getReliableForeignKeys(this.getDbName(), this.getTableName()); + this.isFKFetched = true; + } catch (HiveException e) { + LOG.warn( + "Cannot retrieve FK info for table : " + this.getTableName() + + " ignoring exception: " + e); + } + } return fki; } public void setForeignKeyInfo(ForeignKeyInfo fki) { this.fki = fki; + this.isFKFetched = true; } + /* This only return UNIQUE constraint defined with RELY */ public UniqueConstraint getUniqueKeyInfo() { + if(!isUniqueFetched) { + try { + uki = Hive.get().getReliableUniqueConstraints(this.getDbName(), this.getTableName()); + this.isUniqueFetched = true; + } catch (HiveException e) { + LOG.warn( + "Cannot retrieve Unique Key info for table : " + this.getTableName() + + " ignoring exception: " + e); + } + } return uki; } public void setUniqueKeyInfo(UniqueConstraint uki) { this.uki = uki; + this.isUniqueFetched = true; } + /* This only return NOT NULL constraint defined with RELY */ public NotNullConstraint getNotNullConstraint() { + if(!isNotNullFetched) { + try { + nnc = Hive.get().getReliableNotNullConstraints(this.getDbName(), this.getTableName()); + this.isNotNullFetched = true; + } catch (HiveException e) { + LOG.warn("Cannot retrieve Not Null constraint info for table : " + + this.getTableName() + " ignoring exception: " + e); + } + } return nnc; } public void setNotNullConstraint(NotNullConstraint nnc) { this.nnc = nnc; + this.isNotNullFetched = true; } + /* This only return DEFAULT constraint defined with ENABLE */ public DefaultConstraint getDefaultConstraint() { + if(!isDefaultFetched) { + try { + dc = Hive.get().getEnabledDefaultConstraints(this.getDbName(), this.getTableName()); + this.isDefaultFetched = true; + } catch (HiveException e) { + LOG.warn("Cannot retrieve Default constraint info for table : " + + this.getTableName() + " ignoring exception: " + e); + } + } return dc; } public void setDefaultConstraint(DefaultConstraint dc) { this.dc = dc; + this.isDefaultFetched = true; } + /* This only return CHECK constraint defined with ENABLE */ public CheckConstraint getCheckConstraint() { + if(!isCheckFetched) { + try{ + cc = Hive.get().getEnabledCheckConstraints(this.getDbName(), this.getTableName()); + this.isCheckFetched = true; + } catch (HiveException e) { + LOG.warn("Cannot retrieve Check constraint info for table : " + + this.getTableName() + " ignoring exception: " + e); + } + } return cc; } public void setCheckConstraint(CheckConstraint cc) { this.cc = cc; + this.isCheckFetched = true; } + /** This shouldn't use get apis because those api call metastore + * to fetch constraints. + * getMetaData only need to make a copy of existing constraints, even if those are not fetched + */ + public void copyConstraints(final Table tbl) { + this.pki = tbl.pki; + this.isPKFetched = tbl.isPKFetched; - public ColumnStatistics getColStats() { - return tTable.isSetColStats() ? tTable.getColStats() : null; - } + this.fki = tbl.fki; + this.isFKFetched = tbl.isFKFetched; - /** - * Setup the table level stats as if the table is new. Used when setting up Table for a new - * table or during replication. - */ - public void setStatsStateLikeNewTable() { - if (isPartitioned()) { - StatsSetupConst.setStatsStateForCreateTable(getParameters(), null, - StatsSetupConst.FALSE); - } else { - StatsSetupConst.setStatsStateForCreateTable(getParameters(), - MetaStoreUtils.getColumnNames(getCols()), StatsSetupConst.TRUE); - } + this.uki = tbl.uki; + this.isUniqueFetched = tbl.isUniqueFetched; + + this.nnc = tbl.nnc; + this.isNotNullFetched = tbl.isNotNullFetched; + + this.dc = tbl.dc; + this.isDefaultFetched = tbl.isDefaultFetched; + + this.cc = tbl.cc; + this.isCheckFetched = tbl.isCheckFetched; } + }; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 773c5ca..7680ede 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -5225,24 +5225,19 @@ public class CalcitePlanner extends SemanticAnalyzer { } @Override - protected Table getTableObjectByName(String tableName, boolean throwException) throws HiveException { - if (!tabNameToTabObject.containsKey(tableName)) { - // TODO: The code below should be a single HMS call and possibly unified with method in SemanticAnalyzer - Table table = db.getTable(tableName, throwException); + protected Table getTableObjectByName(String tabName, boolean throwException) throws HiveException { + String[] names = Utilities.getDbTableName(tabName); + final String tableName = names[1]; + final String dbName = names[0]; + final String fullyQualName = dbName + "." + tableName; + if (!tabNameToTabObject.containsKey(fullyQualName)) { + Table table = db.getTable(dbName, tableName, throwException); if (table != null) { - table.setPrimaryKeyInfo(db.getReliablePrimaryKeys( - table.getDbName(), table.getTableName())); - table.setForeignKeyInfo(db.getReliableForeignKeys( - table.getDbName(), table.getTableName())); - table.setUniqueKeyInfo(db.getReliableUniqueConstraints( - table.getDbName(), table.getTableName())); - table.setNotNullConstraint(db.getReliableNotNullConstraints( - table.getDbName(), table.getTableName())); - tabNameToTabObject.put(tableName, table); + tabNameToTabObject.put(fullyQualName, table); } return table; } - return tabNameToTabObject.get(tableName); + return tabNameToTabObject.get(fullyQualName); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index dd23a90..4afd454 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -490,7 +490,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } else { mergeIsDirect = false; } - tabNameToTabObject.clear(); loadTableWork.clear(); loadFileWork.clear(); columnStatsAutoGatherContexts.clear(); @@ -2142,14 +2141,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { // Get table details from tabNameToTabObject cache Table tab = getTableObjectByName(tabName, false); if (tab != null) { - // copy table object in case downstream changes it - Table newTab = new Table(tab.getTTable().deepCopy()); - // copy constraints, we do not need deep copy as - // they should not be changed - newTab.setPrimaryKeyInfo(tab.getPrimaryKeyInfo()); - newTab.setForeignKeyInfo(tab.getForeignKeyInfo()); - newTab.setUniqueKeyInfo(tab.getUniqueKeyInfo()); - newTab.setNotNullConstraint(tab.getNotNullConstraint()); + Table newTab = tab.makeCopy(); tab = newTab; } if (tab == null || diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 8084dcd..cb2d0a7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -1024,7 +1024,7 @@ public class StatsUtils { // This table is dummy and has no stats return stats; } - if (fetchColStats) { + if (fetchColStats && !colStatsToRetrieve.isEmpty()) { try { List<ColumnStatisticsObj> colStat = Hive.get().getTableColumnStatistics( dbName, tabName, colStatsToRetrieve, false); diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index 8220f10..c7e1044 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -2565,6 +2565,9 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable { @Override public List<ColumnStatisticsObj> getTableColumnStatistics(String catName, String dbName, String tableName, List<String> colNames, String engine) throws TException { + if(colNames.isEmpty()) { + return Collections.emptyList(); + } TableStatsRequest rqst = new TableStatsRequest(dbName, tableName, colNames, engine); rqst.setCatName(catName); rqst.setEngine(engine); @@ -2581,6 +2584,9 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable { @Override public List<ColumnStatisticsObj> getTableColumnStatistics(String catName, String dbName, String tableName, List<String> colNames, String engine, String validWriteIdList) throws TException { + if(colNames.isEmpty()) { + return Collections.emptyList(); + } TableStatsRequest rqst = new TableStatsRequest(dbName, tableName, colNames, engine); rqst.setCatName(catName); rqst.setEngine(engine);