This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 989a433abb [Hive] Avoid excessive HMS memory usage when executing 
AlterTable for a Paimon table containing a large number of fields (#4549)
989a433abb is described below

commit 989a433abbf99a3b8ced274b10a95dea8937db77
Author: Gang Yang <[email protected]>
AuthorDate: Sun Nov 24 23:24:06 2024 +0800

    [Hive] Avoid excessive HMS memory usage when executing AlterTable for a 
Paimon table containing a large number of fields (#4549)
---
 .../java/org/apache/paimon/hive/HiveCatalog.java   | 21 ++++-
 .../org/apache/paimon/hive/HiveTableStatsTest.java | 94 ++++++++++++++++++++++
 .../paimon/hive/AlterFailHiveMetaStoreClient.java  |  8 ++
 .../paimon/hive/AlterFailHiveMetaStoreClient.java  |  8 ++
 4 files changed, 128 insertions(+), 3 deletions(-)

diff --git 
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java
 
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java
index e936587320..0f2fb6fa9d 100644
--- 
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java
+++ 
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java
@@ -56,10 +56,12 @@ import org.apache.paimon.view.ViewImpl;
 import org.apache.flink.table.hive.LegacyHiveClasses;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.SerDeInfo;
@@ -85,6 +87,7 @@ import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
 import java.util.function.Function;
@@ -875,11 +878,23 @@ public class HiveCatalog extends AbstractCatalog {
         updateHmsTable(table, identifier, newSchema, 
newSchema.options().get("provider"), location);
         clients.execute(
                 client ->
-                        client.alter_table(
+                        client.alter_table_with_environmentContext(
                                 identifier.getDatabaseName(),
                                 identifier.getTableName(),
                                 table,
-                                true));
+                                createHiveEnvironmentContext()));
+    }
+
+    private EnvironmentContext createHiveEnvironmentContext() {
+        EnvironmentContext environmentContext = new EnvironmentContext();
+        environmentContext.putToProperties(StatsSetupConst.CASCADE, "true");
+        if (Objects.isNull(options)) {
+            return environmentContext;
+        }
+        environmentContext.putToProperties(
+                StatsSetupConst.DO_NOT_UPDATE_STATS,
+                options.getString(StatsSetupConst.DO_NOT_UPDATE_STATS, 
"false"));
+        return environmentContext;
     }
 
     @Override
@@ -1001,7 +1016,7 @@ public class HiveCatalog extends AbstractCatalog {
         return warehouse;
     }
 
-    private Table getHmsTable(Identifier identifier) throws 
TableNotExistException {
+    public Table getHmsTable(Identifier identifier) throws 
TableNotExistException {
         try {
             return clients.run(
                     client ->
diff --git 
a/paimon-hive/paimon-hive-catalog/src/test/java/org/apache/paimon/hive/HiveTableStatsTest.java
 
b/paimon-hive/paimon-hive-catalog/src/test/java/org/apache/paimon/hive/HiveTableStatsTest.java
new file mode 100644
index 0000000000..33016fd083
--- /dev/null
+++ 
b/paimon-hive/paimon-hive-catalog/src/test/java/org/apache/paimon/hive/HiveTableStatsTest.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.hive;
+
+import org.apache.paimon.catalog.Catalog;
+import org.apache.paimon.catalog.CatalogContext;
+import org.apache.paimon.catalog.Identifier;
+import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.options.CatalogOptions;
+import org.apache.paimon.options.Options;
+import org.apache.paimon.schema.Schema;
+import org.apache.paimon.schema.SchemaChange;
+import org.apache.paimon.types.DataField;
+import org.apache.paimon.types.DataTypes;
+
+import org.apache.paimon.shade.guava30.com.google.common.collect.Lists;
+import org.apache.paimon.shade.guava30.com.google.common.collect.Maps;
+
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import java.util.Collections;
+import java.util.UUID;
+
+import static 
org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTORECONNECTURLKEY;
+import static org.assertj.core.api.Assertions.assertThat;
+
+/** Verify that table stats has been updated. */
+public class HiveTableStatsTest {
+    @TempDir java.nio.file.Path tempFile;
+    protected Catalog catalog;
+
+    @BeforeEach
+    public void setUp() throws Exception {
+        String warehouse = tempFile.toUri().toString();
+        HiveConf hiveConf = new HiveConf();
+        String jdoConnectionURL = "jdbc:derby:memory:" + UUID.randomUUID();
+        hiveConf.setVar(METASTORECONNECTURLKEY, jdoConnectionURL + 
";create=true");
+        String metastoreClientClass = 
"org.apache.hadoop.hive.metastore.HiveMetaStoreClient";
+        Options catalogOptions = new Options();
+        catalogOptions.set(StatsSetupConst.DO_NOT_UPDATE_STATS, "true");
+        catalogOptions.set(CatalogOptions.WAREHOUSE, warehouse);
+        CatalogContext catalogContext = CatalogContext.create(catalogOptions);
+        FileIO fileIO = FileIO.get(new Path(warehouse), catalogContext);
+        catalog =
+                new HiveCatalog(fileIO, hiveConf, metastoreClientClass, 
catalogOptions, warehouse);
+    }
+
+    @Test
+    public void testAlterTable() throws Exception {
+        catalog.createDatabase("test_db", false);
+        // Alter table adds a new column to an existing table,but do not 
update stats
+        Identifier identifier = Identifier.create("test_db", "test_table");
+        catalog.createTable(
+                identifier,
+                new Schema(
+                        Lists.newArrayList(new DataField(0, "col1", 
DataTypes.STRING())),
+                        Collections.emptyList(),
+                        Collections.emptyList(),
+                        Maps.newHashMap(),
+                        ""),
+                false);
+        catalog.alterTable(
+                identifier,
+                Lists.newArrayList(
+                        SchemaChange.addColumn("col2", DataTypes.DATE()),
+                        SchemaChange.addColumn("col3", DataTypes.STRING(), 
"col3 field")),
+                false);
+        HiveCatalog hiveCatalog = (HiveCatalog) catalog;
+        Table table = hiveCatalog.getHmsTable(identifier);
+        
assertThat(table.getParameters().get("COLUMN_STATS_ACCURATE")).isEqualTo(null);
+    }
+}
diff --git 
a/paimon-hive/paimon-hive-connector-2.3/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
 
b/paimon-hive/paimon-hive-connector-2.3/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
index ebd4684edf..55e6d74084 100644
--- 
a/paimon-hive/paimon-hive-connector-2.3/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
+++ 
b/paimon-hive/paimon-hive-connector-2.3/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.HiveMetaHookLoader;
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
 import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.Table;
@@ -51,4 +52,11 @@ public class AlterFailHiveMetaStoreClient extends 
HiveMetaStoreClient implements
             throws InvalidOperationException, MetaException, TException {
         throw new TException();
     }
+
+    @Override
+    public void alter_table_with_environmentContext(
+            String defaultDatabaseName, String tblName, Table table, 
EnvironmentContext env)
+            throws InvalidOperationException, MetaException, TException {
+        throw new TException();
+    }
 }
diff --git 
a/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
 
b/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
index ae6a1bb85a..eab18feada 100644
--- 
a/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
+++ 
b/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.metastore.HiveMetaHookLoader;
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
 import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.Table;
@@ -51,4 +52,11 @@ public class AlterFailHiveMetaStoreClient extends 
HiveMetaStoreClient implements
             throws InvalidOperationException, MetaException, TException {
         throw new TException();
     }
+
+    @Override
+    public void alter_table_with_environmentContext(
+            String defaultDatabaseName, String tblName, Table table, 
EnvironmentContext env)
+            throws InvalidOperationException, MetaException, TException {
+        throw new TException();
+    }
 }

Reply via email to