This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 2bdf0cc  HIVE-25773: Column descriptors might not be deleted via 
direct sql (Yu-Wen Lai reviewed by Peter Vary) (#2843)
2bdf0cc is described below

commit 2bdf0ccb94f9555dd0c06131a7fb5defcf8010ed
Author: hsnusonic <hsnuso...@users.noreply.github.com>
AuthorDate: Fri Dec 10 06:16:04 2021 -0800

    HIVE-25773: Column descriptors might not be deleted via direct sql (Yu-Wen 
Lai reviewed by Peter Vary) (#2843)
---
 .../hadoop/hive/metastore/MetaStoreDirectSql.java  | 27 +++++++--------
 .../hadoop/hive/metastore/TestObjectStore.java     | 38 ++++++++++++++++++++--
 2 files changed, 49 insertions(+), 16 deletions(-)

diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index d28e630..b200608 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -29,12 +29,15 @@ import java.sql.Statement;
 import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.TreeMap;
 import java.util.stream.Collectors;
 
@@ -151,7 +154,7 @@ class MetaStoreDirectSql {
    * @return The concatenated list
    * @throws MetaException If the list contains wrong data
    */
-  public static <T> String getIdListForIn(List<T> objectIds) throws 
MetaException {
+  public static <T> String getIdListForIn(Collection<T> objectIds) throws 
MetaException {
     return objectIds.stream()
                .map(i -> i.toString())
                .collect(Collectors.joining(","));
@@ -2622,7 +2625,7 @@ class MetaStoreDirectSql {
             + "WHERE " + PARTITIONS + ".\"PART_ID\" in (" + partitionIds + ")";
 
     List<Object> sdIdList = new ArrayList<>(partitionIdList.size());
-    List<Object> columnDescriptorIdList = new ArrayList<>(1);
+    List<Long> columnDescriptorIdList = new ArrayList<>(1);
     List<Object> serdeIdList = new ArrayList<>(partitionIdList.size());
     try (QueryWrapper query = new 
QueryWrapper(pm.newQuery("javax.jdo.query.SQL", queryText))) {
       List<Object[]> sqlResult = MetastoreDirectSqlUtils
@@ -2808,7 +2811,7 @@ class MetaStoreDirectSql {
    * @throws MetaException If there is an SQL exception during the execution 
it converted to
    * MetaException
    */
-  private void dropDanglingColumnDescriptors(List<Object> 
columnDescriptorIdList)
+  private void dropDanglingColumnDescriptors(List<Long> columnDescriptorIdList)
       throws MetaException {
     if (columnDescriptorIdList.isEmpty()) {
       return;
@@ -2818,26 +2821,24 @@ class MetaStoreDirectSql {
 
     // Drop column descriptor, if no relation left
     queryText =
-        "SELECT " + SDS + ".\"CD_ID\", count(1) "
+        "SELECT " + SDS + ".\"CD_ID\" "
             + "from " + SDS + " "
             + "WHERE " + SDS + ".\"CD_ID\" in (" + colIds + ") "
             + "GROUP BY " + SDS + ".\"CD_ID\"";
-    List<Object> danglingColumnDescriptorIdList = new 
ArrayList<>(columnDescriptorIdList.size());
+    Set<Long> danglingColumnDescriptorIdSet = new 
HashSet<>(columnDescriptorIdList);
     try (QueryWrapper query = new 
QueryWrapper(pm.newQuery("javax.jdo.query.SQL", queryText))) {
-      List<Object[]> sqlResult = MetastoreDirectSqlUtils
-          .ensureList(executeWithArray(query, null, queryText));
+      List<Long> sqlResult = executeWithArray(query, null, queryText);
 
       if (!sqlResult.isEmpty()) {
-        for (Object[] fields : sqlResult) {
-          if (MetastoreDirectSqlUtils.extractSqlInt(fields[1]) == 0) {
-            
danglingColumnDescriptorIdList.add(MetastoreDirectSqlUtils.extractSqlLong(fields[0]));
-          }
+        for (Long cdId : sqlResult) {
+          // the returned CD is not dangling, so remove it from the list
+          danglingColumnDescriptorIdSet.remove(cdId);
         }
       }
     }
-    if (!danglingColumnDescriptorIdList.isEmpty()) {
+    if (!danglingColumnDescriptorIdSet.isEmpty()) {
       try {
-        String danglingCDIds = getIdListForIn(danglingColumnDescriptorIdList);
+        String danglingCDIds = getIdListForIn(danglingColumnDescriptorIdSet);
 
         // Drop the columns_v2
         queryText = "delete from " + COLUMNS_V2 + " where \"CD_ID\" in (" + 
danglingCDIds + ")";
diff --git 
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java
 
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java
index 37ff22c..4de31fe 100644
--- 
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java
+++ 
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java
@@ -563,7 +563,7 @@ public class TestObjectStore {
    * Checks if the directSQL partition drop removes every connected data from 
the RDBMS tables.
    */
   @Test
-  public void testDirectSQLDropParitionsCleanup() throws Exception {
+  public void testDirectSQLDropPartitionsCleanup() throws Exception {
 
     createPartitionedTable(true, true);
 
@@ -583,8 +583,8 @@ public class TestObjectStore {
     checkBackendTableSize("SERDES", 4); // Table has a serde
 
     // drop the partitions
-    try(AutoCloseable c =deadline()) {
-           objectStore.dropPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, 
TABLE1,
+    try (AutoCloseable c = deadline()) {
+      objectStore.dropPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1,
                Arrays.asList("test_part_col=a0", "test_part_col=a1", 
"test_part_col=a2"), true, false);
     }
 
@@ -605,6 +605,38 @@ public class TestObjectStore {
   }
 
   @Test
+  public void testDirectSQLCDsCleanup() throws Exception {
+    createPartitionedTable(true, true);
+    // Checks there is only one CD before altering partition
+    checkBackendTableSize("PARTITIONS", 3);
+    checkBackendTableSize("CDS", 1);
+    checkBackendTableSize("COLUMNS_V2", 5);
+    // Alters a partition to create a new column descriptor
+    List<String> partVals = Arrays.asList("a0");
+    try (AutoCloseable c = deadline()) {
+      Partition part = objectStore.getPartition(DEFAULT_CATALOG_NAME, DB1, 
TABLE1, partVals);
+      StorageDescriptor newSd = part.getSd().deepCopy();
+      newSd.addToCols(new FieldSchema("test_add_col", "int", null));
+      Partition newPart = part.deepCopy();
+      newPart.setSd(newSd);
+      objectStore.alterPartition(DEFAULT_CATALOG_NAME, DB1, TABLE1, partVals, 
newPart, null);
+    }
+    // Checks now there is one more column descriptor
+    checkBackendTableSize("PARTITIONS", 3);
+    checkBackendTableSize("CDS", 2);
+    checkBackendTableSize("COLUMNS_V2", 11);
+    // drop the partitions
+    try (AutoCloseable c = deadline()) {
+      objectStore.dropPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1,
+          Arrays.asList("test_part_col=a0", "test_part_col=a1", 
"test_part_col=a2"), true, false);
+    }
+    // Checks if the data connected to the partitions is dropped
+    checkBackendTableSize("PARTITIONS", 0);
+    checkBackendTableSize("CDS", 1); // Table has a CD
+    checkBackendTableSize("COLUMNS_V2", 5);
+  }
+
+  @Test
   public void testGetPartitionStatistics() throws Exception {
     createPartitionedTable(true, true);
 

Reply via email to