This is an automated email from the ASF dual-hosted git repository. pvary pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 2bdf0cc HIVE-25773: Column descriptors might not be deleted via direct sql (Yu-Wen Lai reviewed by Peter Vary) (#2843) 2bdf0cc is described below commit 2bdf0ccb94f9555dd0c06131a7fb5defcf8010ed Author: hsnusonic <hsnuso...@users.noreply.github.com> AuthorDate: Fri Dec 10 06:16:04 2021 -0800 HIVE-25773: Column descriptors might not be deleted via direct sql (Yu-Wen Lai reviewed by Peter Vary) (#2843) --- .../hadoop/hive/metastore/MetaStoreDirectSql.java | 27 +++++++-------- .../hadoop/hive/metastore/TestObjectStore.java | 38 ++++++++++++++++++++-- 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index d28e630..b200608 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -29,12 +29,15 @@ import java.sql.Statement; import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.TreeMap; import java.util.stream.Collectors; @@ -151,7 +154,7 @@ class MetaStoreDirectSql { * @return The concatenated list * @throws MetaException If the list contains wrong data */ - public static <T> String getIdListForIn(List<T> objectIds) throws MetaException { + public static <T> String getIdListForIn(Collection<T> objectIds) throws MetaException { return objectIds.stream() .map(i -> i.toString()) .collect(Collectors.joining(",")); @@ -2622,7 +2625,7 @@ class MetaStoreDirectSql { + "WHERE " + PARTITIONS + ".\"PART_ID\" in (" + partitionIds + ")"; List<Object> sdIdList = new ArrayList<>(partitionIdList.size()); - List<Object> columnDescriptorIdList = new ArrayList<>(1); + List<Long> columnDescriptorIdList = new ArrayList<>(1); List<Object> serdeIdList = new ArrayList<>(partitionIdList.size()); try (QueryWrapper query = new QueryWrapper(pm.newQuery("javax.jdo.query.SQL", queryText))) { List<Object[]> sqlResult = MetastoreDirectSqlUtils @@ -2808,7 +2811,7 @@ class MetaStoreDirectSql { * @throws MetaException If there is an SQL exception during the execution it converted to * MetaException */ - private void dropDanglingColumnDescriptors(List<Object> columnDescriptorIdList) + private void dropDanglingColumnDescriptors(List<Long> columnDescriptorIdList) throws MetaException { if (columnDescriptorIdList.isEmpty()) { return; @@ -2818,26 +2821,24 @@ class MetaStoreDirectSql { // Drop column descriptor, if no relation left queryText = - "SELECT " + SDS + ".\"CD_ID\", count(1) " + "SELECT " + SDS + ".\"CD_ID\" " + "from " + SDS + " " + "WHERE " + SDS + ".\"CD_ID\" in (" + colIds + ") " + "GROUP BY " + SDS + ".\"CD_ID\""; - List<Object> danglingColumnDescriptorIdList = new ArrayList<>(columnDescriptorIdList.size()); + Set<Long> danglingColumnDescriptorIdSet = new HashSet<>(columnDescriptorIdList); try (QueryWrapper query = new QueryWrapper(pm.newQuery("javax.jdo.query.SQL", queryText))) { - List<Object[]> sqlResult = MetastoreDirectSqlUtils - .ensureList(executeWithArray(query, null, queryText)); + List<Long> sqlResult = executeWithArray(query, null, queryText); if (!sqlResult.isEmpty()) { - for (Object[] fields : sqlResult) { - if (MetastoreDirectSqlUtils.extractSqlInt(fields[1]) == 0) { - danglingColumnDescriptorIdList.add(MetastoreDirectSqlUtils.extractSqlLong(fields[0])); - } + for (Long cdId : sqlResult) { + // the returned CD is not dangling, so remove it from the list + danglingColumnDescriptorIdSet.remove(cdId); } } } - if (!danglingColumnDescriptorIdList.isEmpty()) { + if (!danglingColumnDescriptorIdSet.isEmpty()) { try { - String danglingCDIds = getIdListForIn(danglingColumnDescriptorIdList); + String danglingCDIds = getIdListForIn(danglingColumnDescriptorIdSet); // Drop the columns_v2 queryText = "delete from " + COLUMNS_V2 + " where \"CD_ID\" in (" + danglingCDIds + ")"; diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java index 37ff22c..4de31fe 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java @@ -563,7 +563,7 @@ public class TestObjectStore { * Checks if the directSQL partition drop removes every connected data from the RDBMS tables. */ @Test - public void testDirectSQLDropParitionsCleanup() throws Exception { + public void testDirectSQLDropPartitionsCleanup() throws Exception { createPartitionedTable(true, true); @@ -583,8 +583,8 @@ public class TestObjectStore { checkBackendTableSize("SERDES", 4); // Table has a serde // drop the partitions - try(AutoCloseable c =deadline()) { - objectStore.dropPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1, + try (AutoCloseable c = deadline()) { + objectStore.dropPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1, Arrays.asList("test_part_col=a0", "test_part_col=a1", "test_part_col=a2"), true, false); } @@ -605,6 +605,38 @@ public class TestObjectStore { } @Test + public void testDirectSQLCDsCleanup() throws Exception { + createPartitionedTable(true, true); + // Checks there is only one CD before altering partition + checkBackendTableSize("PARTITIONS", 3); + checkBackendTableSize("CDS", 1); + checkBackendTableSize("COLUMNS_V2", 5); + // Alters a partition to create a new column descriptor + List<String> partVals = Arrays.asList("a0"); + try (AutoCloseable c = deadline()) { + Partition part = objectStore.getPartition(DEFAULT_CATALOG_NAME, DB1, TABLE1, partVals); + StorageDescriptor newSd = part.getSd().deepCopy(); + newSd.addToCols(new FieldSchema("test_add_col", "int", null)); + Partition newPart = part.deepCopy(); + newPart.setSd(newSd); + objectStore.alterPartition(DEFAULT_CATALOG_NAME, DB1, TABLE1, partVals, newPart, null); + } + // Checks now there is one more column descriptor + checkBackendTableSize("PARTITIONS", 3); + checkBackendTableSize("CDS", 2); + checkBackendTableSize("COLUMNS_V2", 11); + // drop the partitions + try (AutoCloseable c = deadline()) { + objectStore.dropPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1, + Arrays.asList("test_part_col=a0", "test_part_col=a1", "test_part_col=a2"), true, false); + } + // Checks if the data connected to the partitions is dropped + checkBackendTableSize("PARTITIONS", 0); + checkBackendTableSize("CDS", 1); // Table has a CD + checkBackendTableSize("COLUMNS_V2", 5); + } + + @Test public void testGetPartitionStatistics() throws Exception { createPartitionedTable(true, true);