[ https://issues.apache.org/jira/browse/HIVE-13353?focusedWorklogId=814653&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-814653 ]
ASF GitHub Bot logged work on HIVE-13353: ----------------------------------------- Author: ASF GitHub Bot Created on: 07/Oct/22 12:21 Start Date: 07/Oct/22 12:21 Worklog Time Spent: 10m Work Description: veghlaci05 commented on code in PR #3608: URL: https://github.com/apache/hive/pull/3608#discussion_r989978699 ########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java: ########## @@ -3970,6 +3960,78 @@ public ShowCompactResponse showCompact(ShowCompactRequest rqst) throws MetaExcep } } + private List<String> getShowCompactionQueryParamList(ShowCompactRequest request) throws MetaException { + String dbName = request.getDbname(); + String tableName = request.getTablename(); + String partName = request.getPartitionname(); + CompactionType type = request.getType(); + String state = request.getState(); + String poolName = request.getPoolName(); + List<String> params = new ArrayList<>(); + if (dbName != null && !dbName.isEmpty()) { + params.add(dbName); + } + if (tableName != null && !tableName.isEmpty()) { + params.add(tableName); + } + if (partName != null && !partName.isEmpty()) { + params.add(partName); + } + if (state != null && !state.isEmpty()) { + params.add(state); + } + if (type != null) { + params.add(thriftCompactionType2DbType(type).toString()); + } + if (org.apache.commons.lang3.StringUtils.isNotBlank(poolName)) { + params.add(poolName); + } + return params; + } + + private String getFilterClause(ShowCompactRequest request) { + StringBuilder filter = new StringBuilder(); + String dbName = request.getDbname(); + String tableName = request.getTablename(); + String partName = request.getPartitionname(); + CompactionType type = request.getType(); + String state = request.getState(); + String poolName = request.getPoolName(); + if (dbName != null && !dbName.isEmpty()) { Review Comment: Same as above, Stringutils.isNotBlank() ? ########## ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java: ########## @@ -5875,6 +5802,15 @@ public ShowCompactResponse showCompactions(String poolName) throws HiveException } } + public ShowCompactResponse showCompactions(ShowCompactRequest request) throws HiveException { Review Comment: ShowCompactResponse showCompactions(String poolName) is no longer used, it can be removed ########## ql/src/java/org/apache/hadoop/hive/ql/ddl/process/show/compactions/ShowCompactionsOperation.java: ########## @@ -68,6 +83,71 @@ public int execute() throws HiveException { return 0; } + private ShowCompactRequest getShowCompactioRequest(ShowCompactionsDesc desc) throws MetaException, SemanticException { + ShowCompactRequest request = new ShowCompactRequest(); + if (desc.getDbName() == null && desc.getTbName() != null) { Review Comment: StringUtils.isNotBlank()? ########## ql/src/java/org/apache/hadoop/hive/ql/ddl/process/show/compactions/ShowCompactionsOperation.java: ########## @@ -68,6 +83,71 @@ public int execute() throws HiveException { return 0; } + private ShowCompactRequest getShowCompactioRequest(ShowCompactionsDesc desc) throws MetaException, SemanticException { + ShowCompactRequest request = new ShowCompactRequest(); + if (desc.getDbName() == null && desc.getTbName() != null) { + request.setDbname(SessionState.get().getCurrentDatabase()); + } else { + request.setDbname(desc.getDbName()); + } + if (desc.getTbName() != null && !desc.getTbName().isEmpty()) + request.setTablename(desc.getTbName()); + if (desc.getPoolName() != null && !desc.getPoolName().isEmpty()) + request.setPoolName(desc.getPoolName()); + if (desc.getCompactionType() != null && !desc.getCompactionType().isEmpty()) + request.setType(inputCompactionType2DBType(desc.getCompactionType())); + if (desc.getCompactionStatus() != null && !desc.getCompactionStatus().isEmpty()) + request.setState(compactorStateFromInput(desc.getCompactionStatus())); + if (desc.getPartSpec() != null && !desc.getPartSpec().isEmpty()) + request.setPartitionname(getPartitionName(desc.getPartSpec())); + return request; + } + + private String getPartitionName(Map<String, String> partitionSpec) throws SemanticException { + String partitionName = null; + if (partitionSpec != null) { + try { + partitionName = Warehouse.makePartName(partitionSpec, false); + } catch (MetaException e) { + throw new SemanticException("partition " + partitionSpec.toString() + " not found"); + } + } + return partitionName; + } + + static CompactionType inputCompactionType2DBType(String inputValue) throws MetaException { + switch (inputValue.toUpperCase()) { + case MAJOR_TYPE: + return CompactionType.MAJOR; + case MINOR_TYPE: + return CompactionType.MINOR; + default: + throw new MetaException("Unexpected compaction type " + inputValue); + } + } + + private String compactorStateFromInput(String s) throws MetaException { Review Comment: Same as above, similar to org.apache.hadoop.hive.metastore.txn.TxnHandler.compactorStateToResponse ########## ql/src/java/org/apache/hadoop/hive/ql/ddl/process/show/compactions/ShowCompactionsAnalyzer.java: ########## @@ -40,20 +42,41 @@ public ShowCompactionsAnalyzer(QueryState queryState) throws SemanticException { @Override public void analyzeInternal(ASTNode root) throws SemanticException { + ctx.setResFile(ctx.getLocalTmpPath()); String poolName = null; - Tree pool = root.getChild(0); - if (pool != null) { - if (pool.getType() != HiveParser.TOK_COMPACT_POOL) { - throw new SemanticException("Unknown token, 'POOL' expected."); - } else { - poolName = unescapeSQLString(pool.getChild(0).getText()); + String dbName = null; + String tbName = null; + String compactionType = null; + String compactionStatus = null; + Map<String, String> partitionSpec = null; + if (root.getChildCount() > 6) { + throw new SemanticException(ErrorMsg.INVALID_AST_TREE.getMsg(root.toStringTree())); + } + if (root.getType() == HiveParser.TOK_SHOW_COMPACTIONS && root.getChildCount() >= 1) { + for (int i = 0; i < root.getChildCount(); i++) { + ASTNode child = (ASTNode) root.getChild(i); + if (child.getType() == HiveParser.TOK_TABTYPE) { + tbName = child.getChild(0).getText(); + // get partition metadata if partition specified + if (child.getChildCount() == 2) { + dbName = DDLUtils.getFQName((ASTNode) child.getChild(0).getChild(0)); Review Comment: no check for child.getChild(0).getChildCount(). Is this safe? ########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java: ########## @@ -3970,6 +3960,78 @@ public ShowCompactResponse showCompact(ShowCompactRequest rqst) throws MetaExcep } } + private List<String> getShowCompactionQueryParamList(ShowCompactRequest request) throws MetaException { + String dbName = request.getDbname(); + String tableName = request.getTablename(); + String partName = request.getPartitionname(); + CompactionType type = request.getType(); + String state = request.getState(); + String poolName = request.getPoolName(); + List<String> params = new ArrayList<>(); + if (dbName != null && !dbName.isEmpty()) { + params.add(dbName); + } + if (tableName != null && !tableName.isEmpty()) { Review Comment: Why not use Stringutils.isNotBlank()? ########## ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java: ########## @@ -105,10 +105,7 @@ import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.api.CompactionRequest; -import org.apache.hadoop.hive.metastore.api.GetPartitionsByNamesRequest; -import org.apache.hadoop.hive.metastore.api.GetTableRequest; -import org.apache.hadoop.hive.metastore.api.UpdateTransactionalStatsRequest; +import org.apache.hadoop.hive.metastore.api.*; Review Comment: Please restore the original imports, it can cause issues when backporting. ########## ql/src/java/org/apache/hadoop/hive/ql/ddl/process/show/compactions/ShowCompactionsOperation.java: ########## @@ -39,16 +43,27 @@ * Operation process of showing compactions. */ public class ShowCompactionsOperation extends DDLOperation<ShowCompactionsDesc> { + + static final protected String MAJOR_TYPE = "MAJOR"; + static final protected String MINOR_TYPE = "MINOR"; + static final char INITIATED_STATE = 'i'; Review Comment: Could these be moved to a common place where both TxnHandler and this class can access them? Like TxnStore? ########## parser/src/test/org/apache/hadoop/hive/ql/parse/TestParseShowCompactions.java: ########## @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import org.junit.Test; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.Is.is; + +public class TestParseShowCompactions { + ParseDriver parseDriver = new ParseDriver(); + + @Test + public void testShowCompactions() throws Exception { + ASTNode tree = parseDriver.parse( + "SHOW COMPACTIONS", null).getTree(); + + assertThat(tree.toStringTree(), is("tok_show_compactions <eof>")); + } + + @Test + public void testShowCompactionsFilterDb() throws Exception { + ASTNode tree = parseDriver.parse( + "SHOW COMPACTIONS DATABASE db1", null).getTree(); + + assertThat(tree.toStringTree(), is("(tok_show_compactions db1) <eof>")); + } + + private static final String EXPECTED_WHEN_FILTER_BY_DB_AND_ALL_AND_ORDER_BY = "\n" + + "nil\n" + + " TOK_SHOW_COMPACTIONS\n" + + " db1\n" + + " TOK_COMPACT_POOL\n" + + " 'pool0'\n" + + " TOK_COMPACTION_TYPE\n" + + " 'minor'\n" + + " TOK_COMPACTION_STATUS\n" + + " 'ready for clean'\n" + + " TOK_ORDERBY\n" + + " TOK_TABSORTCOLNAMEDESC\n" + + " TOK_NULLS_FIRST\n" + + " TOK_TABLE_OR_COL\n" + + " cq_table\n" + + " TOK_TABSORTCOLNAMEASC\n" + + " TOK_NULLS_LAST\n" + + " TOK_TABLE_OR_COL\n" + + " cq_state\n" + + " TOK_LIMIT\n" + + " 42\n" + + " <EOF>\n"; + + @Test + public void testShowCompactionsFilterDbAndAllAndOrder() throws Exception { + ASTNode tree = parseDriver.parse( + "SHOW COMPACTIONS DATABASE db1 POOL 'pool0' TYPE 'minor' STATUS 'ready for clean' ORDER BY cq_table DESC, cq_state LIMIT 42", null).getTree(); Review Comment: I can see no signs of ORDER BY and LIMIT support for SHOW COMPACTIONS in the code. ########## ql/src/java/org/apache/hadoop/hive/ql/ddl/process/show/compactions/ShowCompactionsOperation.java: ########## @@ -68,6 +83,71 @@ public int execute() throws HiveException { return 0; } + private ShowCompactRequest getShowCompactioRequest(ShowCompactionsDesc desc) throws MetaException, SemanticException { + ShowCompactRequest request = new ShowCompactRequest(); + if (desc.getDbName() == null && desc.getTbName() != null) { + request.setDbname(SessionState.get().getCurrentDatabase()); + } else { + request.setDbname(desc.getDbName()); + } + if (desc.getTbName() != null && !desc.getTbName().isEmpty()) + request.setTablename(desc.getTbName()); + if (desc.getPoolName() != null && !desc.getPoolName().isEmpty()) + request.setPoolName(desc.getPoolName()); + if (desc.getCompactionType() != null && !desc.getCompactionType().isEmpty()) + request.setType(inputCompactionType2DBType(desc.getCompactionType())); + if (desc.getCompactionStatus() != null && !desc.getCompactionStatus().isEmpty()) + request.setState(compactorStateFromInput(desc.getCompactionStatus())); + if (desc.getPartSpec() != null && !desc.getPartSpec().isEmpty()) + request.setPartitionname(getPartitionName(desc.getPartSpec())); + return request; + } + + private String getPartitionName(Map<String, String> partitionSpec) throws SemanticException { + String partitionName = null; + if (partitionSpec != null) { + try { + partitionName = Warehouse.makePartName(partitionSpec, false); + } catch (MetaException e) { + throw new SemanticException("partition " + partitionSpec.toString() + " not found"); + } + } + return partitionName; + } + + static CompactionType inputCompactionType2DBType(String inputValue) throws MetaException { Review Comment: This is very similar to org.apache.hadoop.hive.metastore.txn.TxnHandler.dbCompactionType2ThriftType and org.apache.hadoop.hive.metastore.txn.TxnHandler.thriftCompactionType2DbType. Could these methods collected to a common place like some utility class? ########## ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java: ########## @@ -2117,4 +2121,168 @@ public void testIsRawFormatFile() throws Exception { List<String> res = runStatementOnDriver("select * from file_formats"); Assert.assertEquals(3, res.size()); } + @Test + public void testShowCompactions() throws Exception { + d.destroy(); + hiveConf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); + d = new Driver(hiveConf); + //generate some compaction history + runStatementOnDriver("drop database if exists mydb1 cascade"); + runStatementOnDriver("create database mydb1"); + runStatementOnDriver("create table mydb1.tbl0 " + "(a int, b int) partitioned by (p string) clustered by (a) into " + + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); + runStatementOnDriver("insert into mydb1.tbl0" + " PARTITION(p) " + + " values(1,2,'p1'),(3,4,'p1'),(1,2,'p2'),(3,4,'p2'),(1,2,'p3'),(3,4,'p3')"); + runStatementOnDriver("alter table mydb1.tbl0" + " PARTITION(p='p1') compact 'MAJOR'"); + TestTxnCommands2.runWorker(hiveConf); + runStatementOnDriver("alter table mydb1.tbl0" + " PARTITION(p='p2') compact 'MAJOR'"); + TestTxnCommands2.runWorker(hiveConf); + runStatementOnDriver("alter table mydb1.tbl0" + " PARTITION(p='p3') compact 'MAJOR'"); + TestTxnCommands2.runWorker(hiveConf); + runStatementOnDriver("insert into mydb1.tbl0" + " PARTITION(p) " + + " values(4,5,'p1'),(6,7,'p1'),(4,5,'p2'),(6,7,'p2'),(4,5,'p3'),(6,7,'p3')"); + runStatementOnDriver("alter table mydb1.tbl0" + " PARTITION (p='p1') compact 'MAJOR'"); + TestTxnCommands2.runWorker(hiveConf); + runStatementOnDriver("alter table mydb1.tbl0" + " PARTITION (p='p2') compact 'MAJOR'"); + TestTxnCommands2.runWorker(hiveConf); + runStatementOnDriver("alter table mydb1.tbl0" + " PARTITION (p='p3') compact 'MAJOR' pool 'pool0'"); + TestTxnCommands2.runWorker(hiveConf); + TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf); + + SessionState.get().setCurrentDatabase("mydb1"); + + //testing show compaction command + ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); + List<String> r = runStatementOnDriver("SHOW COMPACTIONS"); + Assert.assertEquals(rsp.getCompacts().size()+1, r.size());//includes Header row + + + r = runStatementOnDriver("SHOW COMPACTIONS SCHEMA mydb1 STATUS 'ready for cleaning'"); + Assert.assertEquals(rsp.getCompacts().stream().filter(x->x.getState().equals("ready for cleaning")).count() +1, Review Comment: You are checking the same untouched rsp variable every time after running different commands on the driver. Didn't you want to check the driver command result instead? This also applies to all the blocks below ########## ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java: ########## @@ -3359,7 +3359,58 @@ private void runStatementOnDriverWithAbort(String stmt) { } catch (CommandProcessorException e) { } } + @Test + public void testShowCompactWithFilterOption() throws Exception { Review Comment: Please also add some tests where there are multiple independent filters set up (like pool and partition name) to check if the filtering works well. You could also check the value in the result if that really matches the filtering criteria. Like if you filter for table = bar1, check it in the result. ########## ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java: ########## @@ -2117,4 +2121,168 @@ public void testIsRawFormatFile() throws Exception { List<String> res = runStatementOnDriver("select * from file_formats"); Assert.assertEquals(3, res.size()); } + @Test + public void testShowCompactions() throws Exception { + d.destroy(); + hiveConf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); + d = new Driver(hiveConf); + //generate some compaction history + runStatementOnDriver("drop database if exists mydb1 cascade"); + runStatementOnDriver("create database mydb1"); + runStatementOnDriver("create table mydb1.tbl0 " + "(a int, b int) partitioned by (p string) clustered by (a) into " + + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); + runStatementOnDriver("insert into mydb1.tbl0" + " PARTITION(p) " + + " values(1,2,'p1'),(3,4,'p1'),(1,2,'p2'),(3,4,'p2'),(1,2,'p3'),(3,4,'p3')"); + runStatementOnDriver("alter table mydb1.tbl0" + " PARTITION(p='p1') compact 'MAJOR'"); + TestTxnCommands2.runWorker(hiveConf); + runStatementOnDriver("alter table mydb1.tbl0" + " PARTITION(p='p2') compact 'MAJOR'"); + TestTxnCommands2.runWorker(hiveConf); + runStatementOnDriver("alter table mydb1.tbl0" + " PARTITION(p='p3') compact 'MAJOR'"); + TestTxnCommands2.runWorker(hiveConf); + runStatementOnDriver("insert into mydb1.tbl0" + " PARTITION(p) " + + " values(4,5,'p1'),(6,7,'p1'),(4,5,'p2'),(6,7,'p2'),(4,5,'p3'),(6,7,'p3')"); + runStatementOnDriver("alter table mydb1.tbl0" + " PARTITION (p='p1') compact 'MAJOR'"); + TestTxnCommands2.runWorker(hiveConf); + runStatementOnDriver("alter table mydb1.tbl0" + " PARTITION (p='p2') compact 'MAJOR'"); + TestTxnCommands2.runWorker(hiveConf); + runStatementOnDriver("alter table mydb1.tbl0" + " PARTITION (p='p3') compact 'MAJOR' pool 'pool0'"); + TestTxnCommands2.runWorker(hiveConf); + TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf); + + SessionState.get().setCurrentDatabase("mydb1"); + + //testing show compaction command + ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); + List<String> r = runStatementOnDriver("SHOW COMPACTIONS"); + Assert.assertEquals(rsp.getCompacts().size()+1, r.size());//includes Header row + + + r = runStatementOnDriver("SHOW COMPACTIONS SCHEMA mydb1 STATUS 'ready for cleaning'"); + Assert.assertEquals(rsp.getCompacts().stream().filter(x->x.getState().equals("ready for cleaning")).count() +1, + r.size());//includes Header row + + r = runStatementOnDriver("SHOW COMPACTIONS SCHEMA mydb1 TYPE 'MAJOR' "); + Assert.assertEquals(rsp.getCompacts().stream().filter(x->x.getDbname().equals("mydb1")). + filter(x->x.getType().equals(CompactionType.MAJOR)).count()+1, r.size());//includes Header row + + r = runStatementOnDriver("SHOW COMPACTIONS SCHEMA mydb1 POOL 'poolx' TYPE 'MINOR' "); + //includes Header row + Assert.assertEquals(rsp.getCompacts().stream().filter(x->x.getDbname().equals("mydb1")). + filter(x->x.getPoolName().equals("poolx")).filter(x->x.getType().equals(CompactionType.MAJOR)).count()+1, r.size()); + + r = runStatementOnDriver("SHOW COMPACTIONS SCHEMA mydb1 POOL 'pool0' TYPE 'MAJOR'"); + Assert.assertEquals(2, r.size());//includes Header row + + r = runStatementOnDriver("SHOW COMPACTIONS SCHEMA mydb1 POOL 'pool0'"); + Assert.assertEquals(rsp.getCompacts().stream().filter(x->x.getDbname().equals("mydb1")). + filter(x->x.getPoolName().equals("pool0")).count()+1, r.size());//includes Header row + + r = runStatementOnDriver("SHOW COMPACTIONS DATABASE mydb1 POOL 'pool0'"); + Assert.assertEquals(2, r.size());//includes Header row + + r = runStatementOnDriver("SHOW COMPACTIONS tbl0 TYPE 'MAJOR' "); Review Comment: Since every other filter criteria is named (like SCHEMA, DATABASE, POOL, etc), I think this one should also looke like SHOW COMPACTIONS TABLE tbl0 TYPE 'MAJOR'. This is a bit odd this way. Issue Time Tracking ------------------- Worklog Id: (was: 814653) Time Spent: 1h 10m (was: 1h) > SHOW COMPACTIONS should support filtering options > ------------------------------------------------- > > Key: HIVE-13353 > URL: https://issues.apache.org/jira/browse/HIVE-13353 > Project: Hive > Issue Type: Improvement > Components: Transactions > Affects Versions: 1.3.0, 2.0.0 > Reporter: Eugene Koifman > Assignee: KIRTI RUGE > Priority: Major > Labels: pull-request-available > Attachments: HIVE-13353.01.patch > > Time Spent: 1h 10m > Remaining Estimate: 0h > > Since we now have historical information in SHOW COMPACTIONS the output can > easily become unwieldy. (e.g. 1000 partitions with 3 lines of history each) > this is a significant usability issue > Need to add ability to filter by db/table/partition > Perhaps would also be useful to filter by status -- This message was sent by Atlassian Jira (v8.20.10#820010)