This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 989a433abb [Hive] Avoid excessive HMS memory usage when executing
AlterTable for a Paimon table containing a large number of fields (#4549)
989a433abb is described below
commit 989a433abbf99a3b8ced274b10a95dea8937db77
Author: Gang Yang <[email protected]>
AuthorDate: Sun Nov 24 23:24:06 2024 +0800
[Hive] Avoid excessive HMS memory usage when executing AlterTable for a
Paimon table containing a large number of fields (#4549)
---
.../java/org/apache/paimon/hive/HiveCatalog.java | 21 ++++-
.../org/apache/paimon/hive/HiveTableStatsTest.java | 94 ++++++++++++++++++++++
.../paimon/hive/AlterFailHiveMetaStoreClient.java | 8 ++
.../paimon/hive/AlterFailHiveMetaStoreClient.java | 8 ++
4 files changed, 128 insertions(+), 3 deletions(-)
diff --git
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java
index e936587320..0f2fb6fa9d 100644
---
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java
+++
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java
@@ -56,10 +56,12 @@ import org.apache.paimon.view.ViewImpl;
import org.apache.flink.table.hive.LegacyHiveClasses;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
@@ -85,6 +87,7 @@ import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
@@ -875,11 +878,23 @@ public class HiveCatalog extends AbstractCatalog {
updateHmsTable(table, identifier, newSchema,
newSchema.options().get("provider"), location);
clients.execute(
client ->
- client.alter_table(
+ client.alter_table_with_environmentContext(
identifier.getDatabaseName(),
identifier.getTableName(),
table,
- true));
+ createHiveEnvironmentContext()));
+ }
+
+ private EnvironmentContext createHiveEnvironmentContext() {
+ EnvironmentContext environmentContext = new EnvironmentContext();
+ environmentContext.putToProperties(StatsSetupConst.CASCADE, "true");
+ if (Objects.isNull(options)) {
+ return environmentContext;
+ }
+ environmentContext.putToProperties(
+ StatsSetupConst.DO_NOT_UPDATE_STATS,
+ options.getString(StatsSetupConst.DO_NOT_UPDATE_STATS,
"false"));
+ return environmentContext;
}
@Override
@@ -1001,7 +1016,7 @@ public class HiveCatalog extends AbstractCatalog {
return warehouse;
}
- private Table getHmsTable(Identifier identifier) throws
TableNotExistException {
+ public Table getHmsTable(Identifier identifier) throws
TableNotExistException {
try {
return clients.run(
client ->
diff --git
a/paimon-hive/paimon-hive-catalog/src/test/java/org/apache/paimon/hive/HiveTableStatsTest.java
b/paimon-hive/paimon-hive-catalog/src/test/java/org/apache/paimon/hive/HiveTableStatsTest.java
new file mode 100644
index 0000000000..33016fd083
--- /dev/null
+++
b/paimon-hive/paimon-hive-catalog/src/test/java/org/apache/paimon/hive/HiveTableStatsTest.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.hive;
+
+import org.apache.paimon.catalog.Catalog;
+import org.apache.paimon.catalog.CatalogContext;
+import org.apache.paimon.catalog.Identifier;
+import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.options.CatalogOptions;
+import org.apache.paimon.options.Options;
+import org.apache.paimon.schema.Schema;
+import org.apache.paimon.schema.SchemaChange;
+import org.apache.paimon.types.DataField;
+import org.apache.paimon.types.DataTypes;
+
+import org.apache.paimon.shade.guava30.com.google.common.collect.Lists;
+import org.apache.paimon.shade.guava30.com.google.common.collect.Maps;
+
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import java.util.Collections;
+import java.util.UUID;
+
+import static
org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTORECONNECTURLKEY;
+import static org.assertj.core.api.Assertions.assertThat;
+
+/** Verify that table stats has been updated. */
+public class HiveTableStatsTest {
+ @TempDir java.nio.file.Path tempFile;
+ protected Catalog catalog;
+
+ @BeforeEach
+ public void setUp() throws Exception {
+ String warehouse = tempFile.toUri().toString();
+ HiveConf hiveConf = new HiveConf();
+ String jdoConnectionURL = "jdbc:derby:memory:" + UUID.randomUUID();
+ hiveConf.setVar(METASTORECONNECTURLKEY, jdoConnectionURL +
";create=true");
+ String metastoreClientClass =
"org.apache.hadoop.hive.metastore.HiveMetaStoreClient";
+ Options catalogOptions = new Options();
+ catalogOptions.set(StatsSetupConst.DO_NOT_UPDATE_STATS, "true");
+ catalogOptions.set(CatalogOptions.WAREHOUSE, warehouse);
+ CatalogContext catalogContext = CatalogContext.create(catalogOptions);
+ FileIO fileIO = FileIO.get(new Path(warehouse), catalogContext);
+ catalog =
+ new HiveCatalog(fileIO, hiveConf, metastoreClientClass,
catalogOptions, warehouse);
+ }
+
+ @Test
+ public void testAlterTable() throws Exception {
+ catalog.createDatabase("test_db", false);
+ // Alter table adds a new column to an existing table,but do not
update stats
+ Identifier identifier = Identifier.create("test_db", "test_table");
+ catalog.createTable(
+ identifier,
+ new Schema(
+ Lists.newArrayList(new DataField(0, "col1",
DataTypes.STRING())),
+ Collections.emptyList(),
+ Collections.emptyList(),
+ Maps.newHashMap(),
+ ""),
+ false);
+ catalog.alterTable(
+ identifier,
+ Lists.newArrayList(
+ SchemaChange.addColumn("col2", DataTypes.DATE()),
+ SchemaChange.addColumn("col3", DataTypes.STRING(),
"col3 field")),
+ false);
+ HiveCatalog hiveCatalog = (HiveCatalog) catalog;
+ Table table = hiveCatalog.getHmsTable(identifier);
+
assertThat(table.getParameters().get("COLUMN_STATS_ACCURATE")).isEqualTo(null);
+ }
+}
diff --git
a/paimon-hive/paimon-hive-connector-2.3/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
b/paimon-hive/paimon-hive-connector-2.3/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
index ebd4684edf..55e6d74084 100644
---
a/paimon-hive/paimon-hive-connector-2.3/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
+++
b/paimon-hive/paimon-hive-connector-2.3/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaHookLoader;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
@@ -51,4 +52,11 @@ public class AlterFailHiveMetaStoreClient extends
HiveMetaStoreClient implements
throws InvalidOperationException, MetaException, TException {
throw new TException();
}
+
+ @Override
+ public void alter_table_with_environmentContext(
+ String defaultDatabaseName, String tblName, Table table,
EnvironmentContext env)
+ throws InvalidOperationException, MetaException, TException {
+ throw new TException();
+ }
}
diff --git
a/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
b/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
index ae6a1bb85a..eab18feada 100644
---
a/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
+++
b/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/AlterFailHiveMetaStoreClient.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.HiveMetaHookLoader;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
@@ -51,4 +52,11 @@ public class AlterFailHiveMetaStoreClient extends
HiveMetaStoreClient implements
throws InvalidOperationException, MetaException, TException {
throw new TException();
}
+
+ @Override
+ public void alter_table_with_environmentContext(
+ String defaultDatabaseName, String tblName, Table table,
EnvironmentContext env)
+ throws InvalidOperationException, MetaException, TException {
+ throw new TException();
+ }
}