okumin commented on code in PR #5886: URL: https://github.com/apache/hive/pull/5886#discussion_r2177498637
########## ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java: ########## @@ -428,4 +285,24 @@ public void run() { public void enforceMutex(boolean enableMutex) { this.shouldUseMutex = enableMutex; } + Review Comment: ```suggestion @VisibleForTesting ``` nit ########## ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/TableOptimizer.java: ########## @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.txn.compactor; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.ShowCompactResponse; +import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.txn.TxnStore; +import org.apache.hadoop.hive.metastore.txn.entities.CompactionInfo; + +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Comparator; +import java.util.List; +import java.util.LongSummaryStatistics; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +import static org.apache.hadoop.hive.metastore.HMSHandler.getMSForConf; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog; + +public abstract class TableOptimizer { + private static final String CLASS_NAME = TableOptimizer.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + public abstract Set<CompactionInfo> findPotentialCompactions(long lastChecked, ShowCompactResponse currentCompactions, + Set<String> skipDBs, Set<String> skipTables) throws MetaException; + + protected final HiveConf conf; + protected final TxnStore txnHandler; + protected final MetadataCache metadataCache; + + protected TableOptimizer(HiveConf conf, TxnStore txnHandler, MetadataCache metadataCache) { + this.conf = conf; + this.txnHandler = txnHandler; + this.metadataCache = metadataCache; + } + + protected boolean isEligibleForCompaction(CompactionInfo ci, ShowCompactResponse currentCompactions, + Set<String> skipDBs, Set<String> skipTables) { + try { + if (skipDBs.contains(ci.dbname)) { + LOG.info("Skipping {}::{}, skipDBs::size:{}", ci.dbname, ci.tableName, skipDBs.size()); + return false; + } else { + if (replIsCompactionDisabledForDatabase(ci.dbname)) { + skipDBs.add(ci.dbname); + LOG.info("Skipping {} as compaction is disabled due to repl; skipDBs::size:{}", + ci.dbname, skipDBs.size()); + return false; + } + } + + String qualifiedTableName = ci.getFullTableName(); + if (skipTables.contains(qualifiedTableName)) { + return false; + } + + LOG.info("Checking to see if we should compact {}", ci.getFullPartitionName()); + + // Check if we have already initiated or are working on a compaction for this table/partition. + // Also make sure we haven't exceeded configured number of consecutive failures. + // If any of the above applies, skip it. + // Note: if we are just waiting on cleaning we can still check, as it may be time to compact again even though we haven't cleaned. + if (foundCurrentOrFailedCompactions(currentCompactions, ci)) { + return false; + } + + Table t = metadataCache.computeIfAbsent(qualifiedTableName, () -> + CompactorUtil.resolveTable(conf, ci.dbname, ci.tableName)); + if (t == null) { + LOG.info("Can't find table {}, assuming it's a temp table or has been dropped and moving on.", + ci.getFullTableName()); Review Comment: ```suggestion qualifiedTableName); ``` nit ########## ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/TableOptimizer.java: ########## @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.txn.compactor; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.ShowCompactResponse; +import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.txn.TxnStore; +import org.apache.hadoop.hive.metastore.txn.entities.CompactionInfo; + +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Comparator; +import java.util.List; +import java.util.LongSummaryStatistics; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +import static org.apache.hadoop.hive.metastore.HMSHandler.getMSForConf; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog; + +public abstract class TableOptimizer { + private static final String CLASS_NAME = TableOptimizer.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + public abstract Set<CompactionInfo> findPotentialCompactions(long lastChecked, ShowCompactResponse currentCompactions, + Set<String> skipDBs, Set<String> skipTables) throws MetaException; + + protected final HiveConf conf; + protected final TxnStore txnHandler; + protected final MetadataCache metadataCache; + + protected TableOptimizer(HiveConf conf, TxnStore txnHandler, MetadataCache metadataCache) { + this.conf = conf; + this.txnHandler = txnHandler; + this.metadataCache = metadataCache; + } + + protected boolean isEligibleForCompaction(CompactionInfo ci, ShowCompactResponse currentCompactions, + Set<String> skipDBs, Set<String> skipTables) { + try { + if (skipDBs.contains(ci.dbname)) { + LOG.info("Skipping {}::{}, skipDBs::size:{}", ci.dbname, ci.tableName, skipDBs.size()); + return false; + } else { + if (replIsCompactionDisabledForDatabase(ci.dbname)) { + skipDBs.add(ci.dbname); + LOG.info("Skipping {} as compaction is disabled due to repl; skipDBs::size:{}", + ci.dbname, skipDBs.size()); + return false; + } + } + + String qualifiedTableName = ci.getFullTableName(); + if (skipTables.contains(qualifiedTableName)) { + return false; + } + + LOG.info("Checking to see if we should compact {}", ci.getFullPartitionName()); + + // Check if we have already initiated or are working on a compaction for this table/partition. + // Also make sure we haven't exceeded configured number of consecutive failures. + // If any of the above applies, skip it. + // Note: if we are just waiting on cleaning we can still check, as it may be time to compact again even though we haven't cleaned. + if (foundCurrentOrFailedCompactions(currentCompactions, ci)) { + return false; + } + + Table t = metadataCache.computeIfAbsent(qualifiedTableName, () -> + CompactorUtil.resolveTable(conf, ci.dbname, ci.tableName)); + if (t == null) { + LOG.info("Can't find table {}, assuming it's a temp table or has been dropped and moving on.", + ci.getFullTableName()); + return false; + } + + if (replIsCompactionDisabledForTable(t)) { + skipTables.add(ci.getFullTableName()); Review Comment: ```suggestion skipTables.add(qualifiedTableName); ``` nit -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org