okumin commented on code in PR #6441: URL: https://github.com/apache/hive/pull/6441#discussion_r3226671122
########## standalone-metastore/metastore-rest-catalog/src/main/java/org/apache/iceberg/hive/MetadataLocator.java: ########## @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hive; + +import java.util.Collections; +import java.util.List; + +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.GetProjectionsSpec; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.client.builder.GetTableProjectionsSpecBuilder; +import org.apache.iceberg.BaseMetastoreTableOperations; +import org.apache.iceberg.ClientPool; +import org.apache.iceberg.MetadataTableType; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.thrift.TException; + +/** + * Fetches the location of a given metadata table. + * <p>Since the location mutates with each transaction, this allows determining if a cached version of the + * table is the latest known in the HMS database.</p> + */ +public class MetadataLocator { + private static final org.slf4j.Logger LOGGER = org.slf4j.LoggerFactory.getLogger(MetadataLocator.class); + private static final GetProjectionsSpec PARAM_SPEC = + new GetTableProjectionsSpecBuilder() + .includeParameters() // only fetches table.parameters + .build(); + private final HiveCatalog catalog; + + public MetadataLocator(HiveCatalog catalog) { + this.catalog = catalog; + } + + public HiveCatalog getCatalog() { + return catalog; + } + + /** + * Returns the location of the metadata table identified by the given identifier, or null if the table is + * not a metadata table. + * <p>This uses the Thrift API to fetch the table parameters, which is more efficient than fetching the entire table object.</p> + * @param identifier the identifier of the metadata table to fetch the location for + * @return the location of the metadata table, or null if the table does not exist or is not a metadata table + * @throws NoSuchTableException if the table does not exist + */ + public String getLocation(TableIdentifier identifier) { + final ClientPool<IMetaStoreClient, TException> clients = catalog.clientPool(); + final String catName = catalog.name(); + final TableIdentifier baseTableIdentifier; + if (!catalog.isValidIdentifier(identifier)) { + if (!isValidMetadataIdentifier(identifier)) { + return null; + } else { + baseTableIdentifier = TableIdentifier.of(identifier.namespace().levels()); + } + } else { + baseTableIdentifier = identifier; + } + String database = baseTableIdentifier.namespace().level(0); + String tableName = baseTableIdentifier.name(); + try { + List<Table> tables = + clients.run(client -> client.getTables(catName, database, Collections.singletonList(tableName), PARAM_SPEC)); + if (tables != null && !tables.isEmpty()) { + Table table = tables.getFirst(); + if (table != null) { Review Comment: I guess this is not a view ```suggestion if (table != null) { HiveOperationsBase.validateIcebergViewNotLoadedAsIcebergTable(table, fullName); ``` ########## standalone-metastore/metastore-rest-catalog/src/main/java/org/apache/iceberg/hive/MetadataLocator.java: ########## @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hive; + +import java.util.Collections; +import java.util.List; + +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.GetProjectionsSpec; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.client.builder.GetTableProjectionsSpecBuilder; +import org.apache.iceberg.BaseMetastoreTableOperations; +import org.apache.iceberg.ClientPool; +import org.apache.iceberg.MetadataTableType; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.thrift.TException; + +/** + * Fetches the location of a given metadata table. + * <p>Since the location mutates with each transaction, this allows determining if a cached version of the + * table is the latest known in the HMS database.</p> + */ +public class MetadataLocator { + private static final org.slf4j.Logger LOGGER = org.slf4j.LoggerFactory.getLogger(MetadataLocator.class); + private static final GetProjectionsSpec PARAM_SPEC = + new GetTableProjectionsSpecBuilder() + .includeParameters() // only fetches table.parameters + .build(); + private final HiveCatalog catalog; + + public MetadataLocator(HiveCatalog catalog) { + this.catalog = catalog; + } + + public HiveCatalog getCatalog() { + return catalog; + } + + /** + * Returns the location of the metadata table identified by the given identifier, or null if the table is + * not a metadata table. + * <p>This uses the Thrift API to fetch the table parameters, which is more efficient than fetching the entire table object.</p> + * @param identifier the identifier of the metadata table to fetch the location for + * @return the location of the metadata table, or null if the table does not exist or is not a metadata table + * @throws NoSuchTableException if the table does not exist + */ + public String getLocation(TableIdentifier identifier) { + final ClientPool<IMetaStoreClient, TException> clients = catalog.clientPool(); + final String catName = catalog.name(); + final TableIdentifier baseTableIdentifier; + if (!catalog.isValidIdentifier(identifier)) { + if (!isValidMetadataIdentifier(identifier)) { + return null; Review Comment: If we follow the semantics of loadTable, `NoSuchTableException` ########## standalone-metastore/metastore-rest-catalog/src/main/java/org/apache/iceberg/rest/HMSCachingCatalog.java: ########## @@ -33,65 +51,423 @@ import org.apache.iceberg.exceptions.NamespaceNotEmptyException; import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.hive.HiveCatalog; +import org.apache.iceberg.hive.MetadataLocator; import org.apache.iceberg.view.View; import org.apache.iceberg.view.ViewBuilder; +import org.jetbrains.annotations.TestOnly; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import com.github.benmanes.caffeine.cache.Ticker; /** * Class that wraps an Iceberg Catalog to cache tables. */ -public class HMSCachingCatalog extends CachingCatalog implements SupportsNamespaces, ViewCatalog { +public class HMSCachingCatalog extends CachingCatalog + implements SupportsNamespaces, ViewCatalog, HMSCachingCatalogMXBean, Closeable { + protected static final Logger LOG = LoggerFactory.getLogger(HMSCachingCatalog.class); + + @TestOnly + private static SoftReference<HMSCachingCatalog> cacheRef = new SoftReference<>(null); + + @TestOnly + @SuppressWarnings("unchecked") + public static <C extends Catalog> C getLatestCache(Function<HMSCachingCatalog, C> extractor) { + HMSCachingCatalog cache = cacheRef.get(); + if (cache == null) { + return null; + } + return extractor == null ? (C) cache : extractor.apply(cache); + } + + @TestOnly + public HiveCatalog getCatalog() { + return hiveCatalog; + } + + // The underlying HiveCatalog instance. private final HiveCatalog hiveCatalog; - - public HMSCachingCatalog(HiveCatalog catalog, long expiration) { - super(catalog, true, expiration, Ticker.systemTicker()); + // Duplicate because CachingCatalog doesn't expose the case sensitivity of the underlying catalog, + // which is needed for canonicalizing identifiers before caching. + private final boolean caseSensitive; + // The locator. + private final MetadataLocator metadataLocator; + // An L1 small latency cache. + // This is used to cache the last cached time for each table identifier, + // so that we can skip location check for repeated access to the same table within a short period of time, + // which can significantly reduce the latency for repeated access to the same table. + private final Map<TableIdentifier, Long> l1Cache; + // The TTL for L1 cache (3s). + private final int l1Ttl; + // The L1 cache size. + private final int l1CacheSize; + + // Metrics counters. + private final AtomicLong cacheHitCount = new AtomicLong(0); + private final AtomicLong cacheMissCount = new AtomicLong(0); + private final AtomicLong cacheLoadCount = new AtomicLong(0); + private final AtomicLong cacheInvalidateCount = new AtomicLong(0); + private final AtomicLong cacheMetaLoadCount = new AtomicLong(0); + // L1 cache metrics: counted only when the L2 (Caffeine) cache already has the entry. + private final AtomicLong l1CacheHitCount = new AtomicLong(0); + private final AtomicLong l1CacheMissCount = new AtomicLong(0); + + // JMX ObjectName under which this instance is registered (may be null if registration failed). + private ObjectName jmxObjectName; + + public HMSCachingCatalog(HiveCatalog catalog, long expirationMs) { + this(catalog, expirationMs, /*caseSensitive*/ true); + } + + public HMSCachingCatalog(HiveCatalog catalog, long expirationMs, boolean caseSensitive) { + super(catalog, caseSensitive, expirationMs, Ticker.systemTicker()); this.hiveCatalog = catalog; + this.caseSensitive = caseSensitive; + this.metadataLocator = new MetadataLocator(catalog); + Configuration conf = catalog.getConf(); + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_IN_TEST)) { + // Only keep a reference to the latest cache for testing purpose, so that tests can manipulate the catalog. + cacheRef = new SoftReference<>(this); + } + int l1size = conf.getInt("hms.caching.catalog.l1.cache.size", 32); + int l1ttl = conf.getInt("hms.caching.catalog.l1.cache.ttl", 3_000); + if (l1size > 0 && l1ttl > 0) { + l1Cache = Collections.synchronizedMap(new LinkedHashMap<TableIdentifier, Long>() { + @Override + protected boolean removeEldestEntry(Map.Entry<TableIdentifier, Long> eldest) { + return size() > l1CacheSize; + } + }); + l1Ttl = l1ttl; + l1CacheSize = l1size; + } else { + l1Cache = Collections.emptyMap(); + l1Ttl = 0; + l1CacheSize = 0; + } + registerJmx(catalog.name()); + } + + /** + * Registers this instance as a JMX MBean. + * + * @param catalogName the catalog name, used to build the {@link ObjectName} + */ + private void registerJmx(String catalogName) { + try { + MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + String sanitized = catalogName == null || catalogName.isEmpty() + ? "default" + : catalogName.replaceAll("[^a-zA-Z0-9.\\\\-]", "_"); + ObjectName name = new ObjectName("org.apache.iceberg.rest:type=HMSCachingCatalog,name=" + sanitized); + if (mbs.isRegistered(name)) { + mbs.unregisterMBean(name); + } + mbs.registerMBean(this, name); + this.jmxObjectName = name; + LOG.info("Registered JMX MBean: {}", name); + } catch (JMException e) { + LOG.warn("Failed to register JMX MBean for HMSCachingCatalog", e); + } + } + + /** + * Callback when cache invalidates the entry for a given table identifier. + * + * @param tid the table identifier to invalidate + */ + protected void onCacheInvalidate(TableIdentifier tid) { Review Comment: Or we may not need a method ########## standalone-metastore/metastore-rest-catalog/src/main/java/org/apache/iceberg/hive/MetadataLocator.java: ########## @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hive; + +import java.util.Collections; +import java.util.List; + +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.GetProjectionsSpec; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.client.builder.GetTableProjectionsSpecBuilder; +import org.apache.iceberg.BaseMetastoreTableOperations; +import org.apache.iceberg.ClientPool; +import org.apache.iceberg.MetadataTableType; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.thrift.TException; + +/** + * Fetches the location of a given metadata table. + * <p>Since the location mutates with each transaction, this allows determining if a cached version of the + * table is the latest known in the HMS database.</p> + */ +public class MetadataLocator { + private static final org.slf4j.Logger LOGGER = org.slf4j.LoggerFactory.getLogger(MetadataLocator.class); + private static final GetProjectionsSpec PARAM_SPEC = + new GetTableProjectionsSpecBuilder() + .includeParameters() // only fetches table.parameters + .build(); + private final HiveCatalog catalog; + + public MetadataLocator(HiveCatalog catalog) { + this.catalog = catalog; + } + + public HiveCatalog getCatalog() { + return catalog; + } + + /** + * Returns the location of the metadata table identified by the given identifier, or null if the table is + * not a metadata table. + * <p>This uses the Thrift API to fetch the table parameters, which is more efficient than fetching the entire table object.</p> + * @param identifier the identifier of the metadata table to fetch the location for + * @return the location of the metadata table, or null if the table does not exist or is not a metadata table + * @throws NoSuchTableException if the table does not exist + */ + public String getLocation(TableIdentifier identifier) { + final ClientPool<IMetaStoreClient, TException> clients = catalog.clientPool(); + final String catName = catalog.name(); + final TableIdentifier baseTableIdentifier; + if (!catalog.isValidIdentifier(identifier)) { + if (!isValidMetadataIdentifier(identifier)) { + return null; + } else { + baseTableIdentifier = TableIdentifier.of(identifier.namespace().levels()); + } + } else { + baseTableIdentifier = identifier; + } + String database = baseTableIdentifier.namespace().level(0); + String tableName = baseTableIdentifier.name(); + try { + List<Table> tables = + clients.run(client -> client.getTables(catName, database, Collections.singletonList(tableName), PARAM_SPEC)); + if (tables != null && !tables.isEmpty()) { + Table table = tables.getFirst(); + if (table != null) { + HiveOperationsBase.validateTableIsIceberg(table, tableName); + return table.getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP); + } + } + return null; Review Comment: Probably, NoSuchTableException -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
