Re: [PR] HIVE-28658 Add Iceberg REST Catalog client support [hive]

via GitHub Mon, 07 Jul 2025 05:21:56 -0700


zratkai commented on code in PR #5628:
URL: https://github.com/apache/hive/pull/5628#discussion_r2189911474



##########
iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveIcebergRESTCatalogClientAdapter.java:
##########
@@ -0,0 +1,442 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.hive;
+
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.stream.Collectors;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.metastore.HiveMetaHook;
+import org.apache.hadoop.hive.metastore.HiveMetaHookLoader;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
+import org.apache.hadoop.hive.metastore.api.CompactionMetricsDataStruct;
+import org.apache.hadoop.hive.metastore.api.CreateTableRequest;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.DropDatabaseRequest;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.GetTableRequest;
+import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
+import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.api.PrincipalType;
+import org.apache.hadoop.hive.metastore.api.SQLCheckConstraint;
+import org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint;
+import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
+import org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint;
+import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
+import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.api.UnknownDBException;
+import org.apache.hadoop.hive.metastore.api.WMFullResourcePlan;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.iceberg.BaseTable;
+import org.apache.iceberg.CatalogUtil;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SortOrder;
+import org.apache.iceberg.TableMetadata;
+import org.apache.iceberg.catalog.Namespace;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.exceptions.NoSuchTableException;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.rest.RESTCatalog;
+import org.apache.thrift.TException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HiveIcebergRESTCatalogClientAdapter implements IMetaStoreClient {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(HiveIcebergRESTCatalogClientAdapter.class);
+  public static final String NAMESPACE_SEPARATOR = ".";
+  public static final String NAME = "name";
+  public static final String LOCATION = "location";
+  public static final String ICEBERG_CATALOG_TYPE = 
"iceberg.catalog.default_iceberg.type";
+  public static final String DB_OWNER = "owner";
+  public static final String DB_OWNER_TYPE = "ownerType";
+  public static final String DEFAULT_INPUT_FORMAT_CLASS = 
"org.apache.iceberg.mr.hive.HiveIcebergInputFormat";
+  public static final String DEFAULT_OUTPUT_FORMAT_CLASS
+      = "org.apache.iceberg.mr.hive.HiveIcebergOutputFormat";
+  public static final String DEFAULT_SERDE_CLASS = 
"org.apache.iceberg.mr.hive.HiveIcebergSerDe";
+  public static final String CATALOG_CONFIG_PREFIX = "iceberg.rest-catalog.";
+  public static final String WAREHOUSE = "warehouse";
+  private final Configuration conf;
+  private RESTCatalog restCatalog;
+  private final HiveMetaHookLoader hookLoader;
+
+  private final long maxHiveTablePropertySize;
+
+  public HiveIcebergRESTCatalogClientAdapter(Configuration conf, 
HiveMetaHookLoader hookLoader) {
+    this.conf = conf;
+    this.hookLoader = hookLoader;
+    this.maxHiveTablePropertySize = 
conf.getLong(HiveOperationsBase.HIVE_TABLE_PROPERTY_MAX_SIZE,
+          HiveOperationsBase.HIVE_TABLE_PROPERTY_MAX_SIZE_DEFAULT);
+  }
+
+  @Override
+  public void reconnect()  {
+    Map<String, String> properties = getCatalogPropertiesFromConf(conf);
+    String catalogName = properties.get(WAREHOUSE);
+    restCatalog = new RESTCatalog();
+    restCatalog.initialize(catalogName, properties);

Review Comment:
   Why not in the constructor?
   
       Separation of concerns
       Constructors should only initialize the object’s internal state, not 
perform complex logic like I/O operations.
   
       Easier error handling
       If a network connection fails in the constructor, you can’t catch the 
exception cleanly when the object is being created. This leads to brittle code 
or forced exception handling.
   
       MyClient client = new MyClient(); // What if constructor throws 
IOException?
   
       Testability
       Classes that do heavy work in constructors (like opening sockets or 
database connections) are harder to test, mock, or even instantiate in unit 
tests.
   
       Flexible lifecycle management
       By separating the setup logic (init()) from object creation 
(constructor), you can retry, delay, or configure the connection after object 
construction.
   
   Why prefer init() or a similar method?
   
       You control when the connection happens.
       It’s easier to handle and report errors.
       It allows dependency injection or configuration before setup.
       It aligns with the "construct → configure → initialize → use" lifecycle 
pattern.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] HIVE-28658 Add Iceberg REST Catalog client support [hive]

Reply via email to