deniskuzZ commented on code in PR #5995: URL: https://github.com/apache/hive/pull/5995#discussion_r2322374771
########## iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CatalogUtils.java: ########## @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +import java.util.Map; +import java.util.Optional; +import java.util.Properties; +import java.util.Set; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.iceberg.BaseMetastoreTableOperations; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; + +public class CatalogUtils { + public static final String NAME = "name"; + public static final String LOCATION = "location"; + public static final String CATALOG_NAME = "iceberg.catalog"; + public static final String CATALOG_CONFIG_PREFIX = "iceberg.catalog."; + public static final String CATALOG_WAREHOUSE_TEMPLATE = "iceberg.catalog.%s.warehouse"; + public static final String CATALOG_IMPL_TEMPLATE = "iceberg.catalog.%s.catalog-impl"; + public static final String CATALOG_DEFAULT_CONFIG_PREFIX = "iceberg.catalog-default."; + public static final String ICEBERG_HADOOP_TABLE_NAME = "location_based_table"; + public static final String NO_CATALOG_TYPE = "no catalog"; + public static final Set<String> PROPERTIES_TO_REMOVE = ImmutableSet.of( + // We don't want to push down the metadata location props to Iceberg from HMS, + // since the snapshot pointer in HMS would always be one step ahead + BaseMetastoreTableOperations.METADATA_LOCATION_PROP, + BaseMetastoreTableOperations.PREVIOUS_METADATA_LOCATION_PROP); + + private CatalogUtils() { + + } + + /** + * Calculates the properties we would like to send to the catalog. + * <ul> + * <li>The base of the properties is the properties stored at the Hive Metastore for the given table + * <li>We add the {@link CatalogUtils#LOCATION} as the table location + * <li>We add the {@link CatalogUtils#NAME} as + * TableIdentifier defined by the database name and table name + * <li>We add the serdeProperties of the HMS table + * <li>We remove some parameters that we don't want to push down to the Iceberg table props + * </ul> + * @param hmsTable Table for which we are calculating the properties + * @return The properties we can provide for Iceberg functions + */ + public static Properties getCatalogProperties(org.apache.hadoop.hive.metastore.api.Table hmsTable) { + Properties properties = new Properties(); + properties.putAll(toIcebergProperties(hmsTable.getParameters())); + + if (properties.get(LOCATION) == null && hmsTable.getSd() != null && + hmsTable.getSd().getLocation() != null) { + properties.put(LOCATION, hmsTable.getSd().getLocation()); + } + + if (properties.get(NAME) == null) { + properties.put(NAME, TableIdentifier.of(hmsTable.getDbName(), + hmsTable.getTableName()).toString()); + } + + SerDeInfo serdeInfo = hmsTable.getSd().getSerdeInfo(); + if (serdeInfo != null) { + properties.putAll(toIcebergProperties(serdeInfo.getParameters())); + } + + // Remove HMS table parameters we don't want to propagate to Iceberg + PROPERTIES_TO_REMOVE.forEach(properties::remove); + + return properties; + } + + private static Properties toIcebergProperties(Map<String, String> parameters) { + Properties properties = new Properties(); + parameters.entrySet().stream() + .filter(e -> e.getKey() != null && e.getValue() != null) + .forEach(e -> { + String icebergKey = HMSTablePropertyHelper.translateToIcebergProp(e.getKey()); + properties.put(icebergKey, e.getValue()); + }); + return properties; + } + + /** + * Collect all the catalog specific configuration from the global hive configuration. + * @param conf a Hadoop configuration + * @param catalogName name of the catalog + * @return complete map of catalog properties + */ + public static Map<String, String> getCatalogProperties(Configuration conf, String catalogName) { + Map<String, String> catalogProperties = Maps.newHashMap(); + String keyPrefix = CATALOG_CONFIG_PREFIX + catalogName; + conf.forEach(config -> { + if (config.getKey().startsWith(CatalogUtils.CATALOG_DEFAULT_CONFIG_PREFIX)) { + catalogProperties.putIfAbsent( + config.getKey().substring(CatalogUtils.CATALOG_DEFAULT_CONFIG_PREFIX.length()), + config.getValue()); + } else if (config.getKey().startsWith(keyPrefix)) { + catalogProperties.put( + config.getKey().substring(keyPrefix.length() + 1), + config.getValue()); + } + }); + + return catalogProperties; + } + + public static String getCatalogName(Configuration conf) { + return MetastoreConf.getVar(conf, MetastoreConf.ConfVars.CATALOG_DEFAULT); + } + + public static String getCatalogType(Configuration conf) { + return getCatalogType(conf, CatalogUtils.getCatalogName(conf)); + } + + public static String getCatalogType(Configuration conf, Properties catalogProperties) { + String catalogName = catalogProperties.getProperty( + CatalogUtils.CATALOG_NAME, + MetastoreConf.getVar(conf, MetastoreConf.ConfVars.CATALOG_DEFAULT)); + return getCatalogType(conf, catalogName); + } + + public static boolean isCustomHadoopCatalogImpl(Configuration conf, String catalogName) { + String getCatalogImpl = getCatalogImpl(conf, catalogName); + try { + return Class.forName(CatalogUtil.ICEBERG_CATALOG_HADOOP).isAssignableFrom(Class.forName(getCatalogImpl)); Review Comment: why do we need this? didn't it work before without this check -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org