ayushtkn commented on code in PR #6441:
URL: https://github.com/apache/hive/pull/6441#discussion_r3159001950
##########
standalone-metastore/metastore-rest-catalog/src/main/java/org/apache/iceberg/hive/MetadataLocator.java:
##########
@@ -0,0 +1,101 @@
+/*
Review Comment:
The ASF License header is different here & in the other files, can you copy
from some existing file, even Sonar has complaints about it
##########
standalone-metastore/metastore-rest-catalog/src/main/java/org/apache/iceberg/rest/HMSCachingCatalog.java:
##########
@@ -20,78 +20,276 @@
package org.apache.iceberg.rest;
import com.github.benmanes.caffeine.cache.Ticker;
+
+import java.lang.ref.SoftReference;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Function;
+
+import org.apache.iceberg.BaseMetadataTable;
import org.apache.iceberg.CachingCatalog;
+import org.apache.iceberg.HasTableOperations;
+import org.apache.iceberg.MetadataTableType;
+import org.apache.iceberg.MetadataTableUtils;
import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.TableOperations;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.SupportsNamespaces;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.catalog.ViewCatalog;
import org.apache.iceberg.exceptions.NamespaceNotEmptyException;
import org.apache.iceberg.exceptions.NoSuchNamespaceException;
+import org.apache.iceberg.exceptions.NoSuchTableException;
import org.apache.iceberg.hive.HiveCatalog;
+import org.apache.iceberg.hive.MetadataLocator;
import org.apache.iceberg.view.View;
import org.apache.iceberg.view.ViewBuilder;
-
+import org.jetbrains.annotations.TestOnly;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Class that wraps an Iceberg Catalog to cache tables.
*/
public class HMSCachingCatalog extends CachingCatalog implements
SupportsNamespaces, ViewCatalog {
+ protected static final Logger LOG =
LoggerFactory.getLogger(HMSCachingCatalog.class);
+
+ private static SoftReference<HMSCachingCatalog> cacheRef = new
SoftReference<>(null);
+ @TestOnly
+ public static <C extends Catalog> C
getLatestCache(Function<HMSCachingCatalog, C> extractor) {
+ HMSCachingCatalog cache = cacheRef.get();
+ if (cache == null) {
+ return null;
+ }
+ return extractor == null ? (C) cache : extractor.apply(cache);
+ }
+
+ @TestOnly
+ public HiveCatalog getCatalog() {
+ return hiveCatalog;
+ }
+
private final HiveCatalog hiveCatalog;
-
- public HMSCachingCatalog(HiveCatalog catalog, long expiration) {
- super(catalog, true, expiration, Ticker.systemTicker());
+ // Metrics counters
+ private final AtomicLong cacheHitCount = new AtomicLong(0);
+ private final AtomicLong cacheMissCount = new AtomicLong(0);
+ private final AtomicLong cacheLoadCount = new AtomicLong(0);
+ private final AtomicLong cacheInvalidateCount = new AtomicLong(0);
+ private final AtomicLong cacheMetaLoadCount = new AtomicLong(0);
+
+ public HMSCachingCatalog(HiveCatalog catalog, long expirationMs) {
+ this(catalog, expirationMs, /*caseSensitive*/ true);
+ }
+
+ public HMSCachingCatalog(HiveCatalog catalog, long expirationMs, boolean
caseSensitive) {
+ super(catalog, caseSensitive, expirationMs, Ticker.systemTicker());
this.hiveCatalog = catalog;
+ if (catalog.getConf().getBoolean("metastore.iceberg.catalog.cache.debug",
false)) {
Review Comment:
This config `metastore.iceberg.catalog.cache.debug` seems redundant, can you
use the ` HiveConf.ConfVars.HIVE_IN_TEST`, I don't think we can convince
people to have test configs per use case :-)
##########
standalone-metastore/metastore-rest-catalog/src/main/java/org/apache/iceberg/hive/MetadataLocator.java:
##########
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.hive;
+
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.GetProjectionsSpec;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.api.Table;
+import
org.apache.hadoop.hive.metastore.client.builder.GetTableProjectionsSpecBuilder;
+import org.apache.iceberg.BaseMetastoreTableOperations;
+import org.apache.iceberg.ClientPool;
+import org.apache.iceberg.MetadataTableType;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.exceptions.NoSuchTableException;
+import org.apache.thrift.TException;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Fetches the location of a given metadata table.
+ * <p>Since the location mutates with each transaction, this allows
determining if a cached version of the
+ * table is the latest known in the HMS database.</p>
+ */
+public class MetadataLocator {
+ private static final org.slf4j.Logger LOGGER =
org.slf4j.LoggerFactory.getLogger(MetadataLocator.class);
+ static final GetProjectionsSpec PARAM_SPEC = new
GetTableProjectionsSpecBuilder()
+ .includeParameters() // only fetches table.parameters
+ .build();
+ private final HiveCatalog catalog;
+
+ public MetadataLocator(HiveCatalog catalog) {
+ this.catalog = catalog;
+ }
+
+ public HiveCatalog getCatalog() {
+ return catalog;
+ }
+
+ /**
+ * Returns the location of the metadata table identified by the given
identifier, or null if the table does not exist or is
+ * not a metadata table.
+ * <p>This uses the Thrift API to fetch the table parameters, which is more
efficient than fetching the entire table object.</p>
+ * @param identifier the identifier of the metadata table to fetch the
location for
+ * @return the location of the metadata table, or null if the table does not
exist or is not a metadata table
+ */
+ public String getLocation(TableIdentifier identifier) {
+ final ClientPool<IMetaStoreClient, TException> clients =
catalog.clientPool();
+ final String catName = catalog.name();
+ final TableIdentifier baseTableIdentifier;
+ if (!catalog.isValidIdentifier(identifier)) {
+ if (!isValidMetadataIdentifier(identifier)) {
+ return null;
+ } else {
+ baseTableIdentifier =
TableIdentifier.of(identifier.namespace().levels());
+ }
+ } else {
+ baseTableIdentifier = identifier;
+ }
+ String database = baseTableIdentifier.namespace().level(0);
+ String tableName = baseTableIdentifier.name();
+ try {
+ List<Table> tables = clients.run(
+ client -> client.getTables(catName, database,
Collections.singletonList(tableName), PARAM_SPEC)
+ );
+ return tables == null || tables.isEmpty()
+ ? null
+ :
tables.getFirst().getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP);
+ } catch (NoSuchTableException | NoSuchObjectException e) {
+ LOGGER.info("Table not found {}", baseTableIdentifier, e);
+ } catch (TException e) {
+ LOGGER.info("Table parameters fetch failed {}", baseTableIdentifier, e);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ LOGGER.error("Interrupted in call to check table existence of {}",
baseTableIdentifier, e);
+ }
+ return null;
+ }
Review Comment:
I think we should return `null` in case of `(NoSuchTableException |
NoSuchObjectException e) ` only, else in the caller method we treat `null` as
table doesn't exist or so, where in `TException` can be due to any other HMS
related issue, for which we should fail only rather than returning the stale
copy
##########
standalone-metastore/metastore-rest-catalog/src/test/java/org/apache/iceberg/rest/TestHMSCachingCatalogStats.java:
##########
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.rest;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import org.apache.hadoop.hive.metastore.ServletSecurity.AuthType;
+import org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DataFiles;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.catalog.Catalog;
+import org.apache.iceberg.catalog.Namespace;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.hive.HiveCatalog;
+import org.apache.iceberg.rest.extension.HiveRESTCatalogServerExtension;
+import org.apache.iceberg.rest.responses.HMSCacheStatsResponse;
+import org.junit.experimental.categories.Category;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+import org.junit.jupiter.api.extension.RegisterExtension;
+
+/**
+ * Integration tests that verify the {@link HMSCachingCatalog}
cache-statistics counters
+ * (hit, miss, load, hit-rate) are updated correctly and exposed accurately
via the
+ * {@code GET v1/cache/stats} REST endpoint.
+ *
+ * <p>The server is started with {@link AuthType#NONE} so the tests focus
purely on
+ * caching behaviour without any authentication noise.
+ */
+@Category(MetastoreCheckinTest.class)
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+class TestHMSCachingCatalogStats {
+
+ /** 5 minutes expressed in milliseconds – the value injected into {@code
ICEBERG_CATALOG_CACHE_EXPIRY}. */
+ private static final long CACHE_EXPIRY_MS = 5 * 60 * 1_000L;
+
+ @RegisterExtension
+ private static final HiveRESTCatalogServerExtension REST_CATALOG_EXTENSION =
+ HiveRESTCatalogServerExtension.builder(AuthType.NONE)
+ // Without a positive expiry the HMSCatalogFactory skips
HMSCachingCatalog entirely.
+ .configure(
+ MetastoreConf.ConfVars.ICEBERG_CATALOG_CACHE_EXPIRY.getVarname(),
+ String.valueOf(CACHE_EXPIRY_MS))
+ .configure("metastore.iceberg.catalog.cache.debug", "true")
+ .build();
+
+ private RESTCatalog catalog;
+ private HiveCatalog serverCatalog;
+
+ @BeforeAll
+ void setupAll() {
+ catalog = RCKUtils.initCatalogClient(clientConfig());
+ serverCatalog =
HMSCachingCatalog.getLatestCache(HMSCachingCatalog::getCatalog);
+ }
+
+ /** Remove any namespace/table created by the test so each run starts clean.
*/
+ @AfterEach
+ void cleanup() {
+ RCKUtils.purgeCatalogTestEntries(catalog);
+ }
+
+ //
---------------------------------------------------------------------------
+ // helpers
+ //
---------------------------------------------------------------------------
+
+ private java.util.Map<String, String> clientConfig() {
+ return java.util.Map.of("uri", REST_CATALOG_EXTENSION.getRestEndpoint());
+ }
+
+ /**
+ * Calls the {@code GET v1/cache/stats} endpoint directly over HTTP and
returns
+ * the deserialised {@link HMSCacheStatsResponse}.
+ */
+ private HMSCacheStatsResponse fetchCacheStats() throws Exception {
+ String statsUrl = REST_CATALOG_EXTENSION.getRestEndpoint() +
"/v1/cache/stats";
+ HttpRequest request = HttpRequest.newBuilder()
+ .uri(URI.create(statsUrl))
+ .GET()
+ .build();
+ HttpResponse<String> response;
+ try (HttpClient client = HttpClient.newHttpClient()) {
+ response = client.send(request, HttpResponse.BodyHandlers.ofString());
+ }
+ Assertions.assertEquals(200, response.statusCode(),
+ "Expected HTTP 200 from cache stats endpoint, got: " +
response.statusCode());
+ return new ObjectMapper().readValue(response.body(),
HMSCacheStatsResponse.class);
+ }
+
+
+ /**
+ * Verifies that the {@link HMSCachingCatalog} correctly tracks cache hits,
misses and
+ * loads, and that those counters are accurately returned via the REST
endpoint.
+ *
+ * <p>Strategy:
+ * <ol>
+ * <li>Snapshot baseline stats before any operations so the test is
isolated from
+ * cumulative counters left by previous tests.</li>
+ * <li>Create a namespace and a table (bypasses the cache – done via
+ * {@link org.apache.iceberg.hive.HiveCatalog} directly).</li>
+ * <li>First {@code loadTable} call → cache miss + actual load.</li>
+ * <li>Second and third {@code loadTable} calls → cache hits (metadata
location
+ * has not changed, so the cached entry is still valid).</li>
+ * <li>Fetch stats again and assert the deltas against the baseline.</li>
+ * </ol>
+ */
+ @Test
+ void testCacheCountersAreUpdated() throws Exception {
+ // -- baseline
---------------------------------------------------------------
+ HMSCacheStatsResponse baseline = fetchCacheStats();
+ long baseHit = baseline.stats().getOrDefault("hit", 0L).longValue();
+ long baseMiss = baseline.stats().getOrDefault("miss", 0L).longValue();
+ long baseLoad = baseline.stats().getOrDefault("load", 0L).longValue();
+
+ // -- exercise the cache
-----------------------------------------------------
+ var db = Namespace.of("caching_stats_test_db");
+ var tableId = TableIdentifier.of(db, "caching_stats_test_table");
+
+ catalog.createNamespace(db);
+ catalog.createTable(tableId, new Schema());
+
+ // First load → cache miss + load
+ catalog.loadTable(tableId);
+ // Second load → cache hit (metadata location unchanged)
+ catalog.loadTable(tableId);
+ // Third load → cache hit
+ catalog.loadTable(tableId);
+
+ // Mutate the table by appending a data file – this creates a new snapshot
+ // which advances METADATA_LOCATION in HMS, so the next loadTable call
through
+ // the caching catalog will detect the stale cached location and
invalidate it.
+ Table table = serverCatalog.loadTable(tableId);
+ DataFile dataFile = DataFiles.builder(PartitionSpec.unpartitioned())
+ .withPath(table.location() + "/data/fake-0.parquet")
+ .withFileSizeInBytes(1024)
+ .withRecordCount(1)
+ .build();
+ table.newAppend()
+ .appendFile(dataFile)
+ .commit();
+
+ long baseInvalidate = fetchCacheStats().stats().getOrDefault("invalidate",
0L).longValue();
+
+ // Fourth load → cache invalidation + load (cached location != HMS
location)
+ catalog.loadTable(tableId);
+
+ // -- fetch updated stats via the REST endpoint
------------------------------
+ HMSCacheStatsResponse after = fetchCacheStats();
+ long deltaHit = after.stats().getOrDefault("hit",
0L).longValue() - baseHit;
+ long deltaMiss = after.stats().getOrDefault("miss",
0L).longValue() - baseMiss;
+ long deltaLoad = after.stats().getOrDefault("load",
0L).longValue() - baseLoad;
+ long deltaInvalidate = after.stats().getOrDefault("invalidate",
0L).longValue() - baseInvalidate;
+
+ // -- assertions
-------------------------------------------------------------
+ Assertions.assertTrue(deltaMiss >= 1,
+ "Expected at least 1 cache miss (first loadTable), but delta was: " +
deltaMiss);
+ Assertions.assertTrue(deltaLoad >= 2,
+ "Expected at least 2 cache loads (initial load + post-invalidation
reload), but delta was: " + deltaLoad);
+ Assertions.assertTrue(deltaHit >= 2,
+ "Expected at least 2 cache hits (second + third loadTable), but delta
was: " + deltaHit);
+ Assertions.assertTrue(deltaInvalidate >= 1,
+ "Expected at least 1 cache invalidation (metadata location changed
after table update), but delta was: " + deltaInvalidate);
+
+ // hit-rate must be a valid ratio in [0.0, 1.0]
+ double hitRate = after.stats().getOrDefault("hit-rate", 0.0).doubleValue();
+ Assertions.assertTrue(hitRate >= 0.0 && hitRate <= 1.0,
+ "hit-rate must be in [0.0, 1.0] but was: " + hitRate);
Review Comment:
The test passes with 0.0 as well, I think this isn't actually testing it
right, the default is 0.0 as well, so in that case the test would pass
##########
standalone-metastore/metastore-rest-catalog/src/main/java/org/apache/iceberg/hive/MetadataLocator.java:
##########
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.hive;
+
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.GetProjectionsSpec;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.api.Table;
+import
org.apache.hadoop.hive.metastore.client.builder.GetTableProjectionsSpecBuilder;
+import org.apache.iceberg.BaseMetastoreTableOperations;
+import org.apache.iceberg.ClientPool;
+import org.apache.iceberg.MetadataTableType;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.exceptions.NoSuchTableException;
+import org.apache.thrift.TException;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Fetches the location of a given metadata table.
+ * <p>Since the location mutates with each transaction, this allows
determining if a cached version of the
+ * table is the latest known in the HMS database.</p>
+ */
+public class MetadataLocator {
+ private static final org.slf4j.Logger LOGGER =
org.slf4j.LoggerFactory.getLogger(MetadataLocator.class);
+ static final GetProjectionsSpec PARAM_SPEC = new
GetTableProjectionsSpecBuilder()
+ .includeParameters() // only fetches table.parameters
+ .build();
+ private final HiveCatalog catalog;
+
+ public MetadataLocator(HiveCatalog catalog) {
+ this.catalog = catalog;
+ }
+
+ public HiveCatalog getCatalog() {
+ return catalog;
+ }
+
+ /**
+ * Returns the location of the metadata table identified by the given
identifier, or null if the table does not exist or is
+ * not a metadata table.
+ * <p>This uses the Thrift API to fetch the table parameters, which is more
efficient than fetching the entire table object.</p>
+ * @param identifier the identifier of the metadata table to fetch the
location for
+ * @return the location of the metadata table, or null if the table does not
exist or is not a metadata table
+ */
+ public String getLocation(TableIdentifier identifier) {
+ final ClientPool<IMetaStoreClient, TException> clients =
catalog.clientPool();
+ final String catName = catalog.name();
+ final TableIdentifier baseTableIdentifier;
+ if (!catalog.isValidIdentifier(identifier)) {
+ if (!isValidMetadataIdentifier(identifier)) {
+ return null;
+ } else {
+ baseTableIdentifier =
TableIdentifier.of(identifier.namespace().levels());
+ }
+ } else {
+ baseTableIdentifier = identifier;
+ }
+ String database = baseTableIdentifier.namespace().level(0);
+ String tableName = baseTableIdentifier.name();
+ try {
+ List<Table> tables = clients.run(
+ client -> client.getTables(catName, database,
Collections.singletonList(tableName), PARAM_SPEC)
+ );
+ return tables == null || tables.isEmpty()
+ ? null
+ :
tables.getFirst().getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP);
+ } catch (NoSuchTableException | NoSuchObjectException e) {
+ LOGGER.info("Table not found {}", baseTableIdentifier, e);
Review Comment:
this could be debug, normal routine, the table isn't there....
##########
standalone-metastore/metastore-rest-catalog/src/main/java/org/apache/iceberg/rest/HMSCachingCatalog.java:
##########
@@ -20,78 +20,276 @@
package org.apache.iceberg.rest;
import com.github.benmanes.caffeine.cache.Ticker;
+
+import java.lang.ref.SoftReference;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Function;
+
+import org.apache.iceberg.BaseMetadataTable;
import org.apache.iceberg.CachingCatalog;
+import org.apache.iceberg.HasTableOperations;
+import org.apache.iceberg.MetadataTableType;
+import org.apache.iceberg.MetadataTableUtils;
import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.TableOperations;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.SupportsNamespaces;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.catalog.ViewCatalog;
import org.apache.iceberg.exceptions.NamespaceNotEmptyException;
import org.apache.iceberg.exceptions.NoSuchNamespaceException;
+import org.apache.iceberg.exceptions.NoSuchTableException;
import org.apache.iceberg.hive.HiveCatalog;
+import org.apache.iceberg.hive.MetadataLocator;
import org.apache.iceberg.view.View;
import org.apache.iceberg.view.ViewBuilder;
-
+import org.jetbrains.annotations.TestOnly;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Class that wraps an Iceberg Catalog to cache tables.
*/
public class HMSCachingCatalog extends CachingCatalog implements
SupportsNamespaces, ViewCatalog {
+ protected static final Logger LOG =
LoggerFactory.getLogger(HMSCachingCatalog.class);
+
+ private static SoftReference<HMSCachingCatalog> cacheRef = new
SoftReference<>(null);
+ @TestOnly
+ public static <C extends Catalog> C
getLatestCache(Function<HMSCachingCatalog, C> extractor) {
+ HMSCachingCatalog cache = cacheRef.get();
+ if (cache == null) {
+ return null;
+ }
+ return extractor == null ? (C) cache : extractor.apply(cache);
+ }
+
+ @TestOnly
+ public HiveCatalog getCatalog() {
+ return hiveCatalog;
+ }
+
private final HiveCatalog hiveCatalog;
-
- public HMSCachingCatalog(HiveCatalog catalog, long expiration) {
- super(catalog, true, expiration, Ticker.systemTicker());
+ // Metrics counters
+ private final AtomicLong cacheHitCount = new AtomicLong(0);
+ private final AtomicLong cacheMissCount = new AtomicLong(0);
+ private final AtomicLong cacheLoadCount = new AtomicLong(0);
+ private final AtomicLong cacheInvalidateCount = new AtomicLong(0);
+ private final AtomicLong cacheMetaLoadCount = new AtomicLong(0);
+
+ public HMSCachingCatalog(HiveCatalog catalog, long expirationMs) {
+ this(catalog, expirationMs, /*caseSensitive*/ true);
+ }
+
+ public HMSCachingCatalog(HiveCatalog catalog, long expirationMs, boolean
caseSensitive) {
+ super(catalog, caseSensitive, expirationMs, Ticker.systemTicker());
this.hiveCatalog = catalog;
+ if (catalog.getConf().getBoolean("metastore.iceberg.catalog.cache.debug",
false)) {
+ cacheRef = new SoftReference<>(this);
+ }
+ }
+
+ /**
+ * Callback when cache invalidates the entry for a given table identifier.
+ *
+ * @param tid the table identifier to invalidate
+ */
+ protected void onCacheInvalidate(TableIdentifier tid) {
+ cacheInvalidateCount.incrementAndGet();
+ LOG.debug("Cache invalidate {}: {}", tid, cacheInvalidateCount.get());
+ }
+
+ /**
+ * Callback when cache loads a table for a given table identifier.
+ *
+ * @param tid the table identifier
+ */
+ protected void onCacheLoad(TableIdentifier tid) {
+ cacheLoadCount.incrementAndGet();
+ LOG.debug("Cache load {}: {}", tid, cacheLoadCount.get());
+ }
+
+ /**
+ * Callback when cache hit for a given table identifier.
+ *
+ * @param tid the table identifier
+ */
+ protected void onCacheHit(TableIdentifier tid) {
+ cacheHitCount.incrementAndGet();
+ LOG.debug("Cache hit {} : {}", tid, cacheHitCount.get());
+ }
+
+ /**
+ * Callback when cache miss occurs for a given table identifier.
+ *
+ * @param tid the table identifier
+ */
+ protected void onCacheMiss(TableIdentifier tid) {
+ cacheMissCount.incrementAndGet();
+ LOG.debug("Cache miss {}: {}", tid, cacheMissCount.get());
+ }
+
+ /**
+ * Callback when cache loads a metadata table for a given table identifier.
+ *
+ * @param tid the table identifier
+ */
+ protected void onCacheMetaLoad(TableIdentifier tid) {
+ cacheMetaLoadCount.incrementAndGet();
+ LOG.debug("Cache meta-load {}: {}", tid, cacheMetaLoadCount.get());
+ }
+
+ // Getter methods for accessing metrics
+ public long getCacheHitCount() {
+ return cacheHitCount.get();
}
+ public long getCacheMissCount() {
+ return cacheMissCount.get();
+ }
+
+ public long getCacheLoadCount() {
+ return cacheLoadCount.get();
+ }
+
+ public long getCacheInvalidateCount() {
+ return cacheInvalidateCount.get();
+ }
+
+ public long getCacheMetaLoadCount() {
+ return cacheMetaLoadCount.get();
+ }
+
+ public double getCacheHitRate() {
+ long hits = cacheHitCount.get();
+ long total = hits + cacheMissCount.get();
+ return total == 0 ? 0.0 : (double) hits / total;
+ }
+
+ /**
+ * Generates a map of this cache's performance metrics, including hit count,
+ * miss count, load count, invalidate count, meta-load count, and hit rate.
+ * This can be used for monitoring and debugging purposes to understand the
effectiveness of the cache.
+ * @return a map of cache performance metrics
+ */
+ public Map<String, Number> cacheStats() {
+ return Map.of(
+ "hit", getCacheHitCount(),
+ "miss", getCacheMissCount(),
+ "load", getCacheLoadCount(),
+ "invalidate", getCacheInvalidateCount(),
+ "metaload", getCacheMetaLoadCount(),
+ "hit-rate", getCacheHitRate()
+ );
+ }
+
+
@Override
- public Catalog.TableBuilder buildTable(TableIdentifier identifier, Schema
schema) {
- return hiveCatalog.buildTable(identifier, schema);
+ public void createNamespace(Namespace namespace, Map<String, String> map) {
+ hiveCatalog.createNamespace(namespace, map);
}
@Override
- public void createNamespace(Namespace nmspc, Map<String, String> map) {
- hiveCatalog.createNamespace(nmspc, map);
+ public List<Namespace> listNamespaces(Namespace namespace) throws
NoSuchNamespaceException {
+ return hiveCatalog.listNamespaces(namespace);
}
@Override
- public List<Namespace> listNamespaces(Namespace nmspc) throws
NoSuchNamespaceException {
- return hiveCatalog.listNamespaces(nmspc);
+ public Table loadTable(final TableIdentifier identifier) {
+ final TableIdentifier canonicalized = identifier.toLowerCase();
+ final Table cachedTable = tableCache.getIfPresent(canonicalized);
+ if (cachedTable != null) {
+ final String location = new
MetadataLocator(hiveCatalog).getLocation(canonicalized);
Review Comment:
Why don't we store `new MetadataLocator(hiveCatalog)` as well like
`hiveCatalog`, rather than creating every time
##########
standalone-metastore/metastore-rest-catalog/src/main/java/org/apache/iceberg/rest/HMSCachingCatalog.java:
##########
@@ -20,78 +20,276 @@
package org.apache.iceberg.rest;
import com.github.benmanes.caffeine.cache.Ticker;
+
+import java.lang.ref.SoftReference;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Function;
+
+import org.apache.iceberg.BaseMetadataTable;
import org.apache.iceberg.CachingCatalog;
+import org.apache.iceberg.HasTableOperations;
+import org.apache.iceberg.MetadataTableType;
+import org.apache.iceberg.MetadataTableUtils;
import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.TableOperations;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.SupportsNamespaces;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.catalog.ViewCatalog;
import org.apache.iceberg.exceptions.NamespaceNotEmptyException;
import org.apache.iceberg.exceptions.NoSuchNamespaceException;
+import org.apache.iceberg.exceptions.NoSuchTableException;
import org.apache.iceberg.hive.HiveCatalog;
+import org.apache.iceberg.hive.MetadataLocator;
import org.apache.iceberg.view.View;
import org.apache.iceberg.view.ViewBuilder;
-
+import org.jetbrains.annotations.TestOnly;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Class that wraps an Iceberg Catalog to cache tables.
*/
public class HMSCachingCatalog extends CachingCatalog implements
SupportsNamespaces, ViewCatalog {
+ protected static final Logger LOG =
LoggerFactory.getLogger(HMSCachingCatalog.class);
+
+ private static SoftReference<HMSCachingCatalog> cacheRef = new
SoftReference<>(null);
+ @TestOnly
+ public static <C extends Catalog> C
getLatestCache(Function<HMSCachingCatalog, C> extractor) {
+ HMSCachingCatalog cache = cacheRef.get();
+ if (cache == null) {
+ return null;
+ }
+ return extractor == null ? (C) cache : extractor.apply(cache);
+ }
+
+ @TestOnly
+ public HiveCatalog getCatalog() {
+ return hiveCatalog;
+ }
+
private final HiveCatalog hiveCatalog;
-
- public HMSCachingCatalog(HiveCatalog catalog, long expiration) {
- super(catalog, true, expiration, Ticker.systemTicker());
+ // Metrics counters
+ private final AtomicLong cacheHitCount = new AtomicLong(0);
+ private final AtomicLong cacheMissCount = new AtomicLong(0);
+ private final AtomicLong cacheLoadCount = new AtomicLong(0);
+ private final AtomicLong cacheInvalidateCount = new AtomicLong(0);
+ private final AtomicLong cacheMetaLoadCount = new AtomicLong(0);
+
+ public HMSCachingCatalog(HiveCatalog catalog, long expirationMs) {
+ this(catalog, expirationMs, /*caseSensitive*/ true);
+ }
+
+ public HMSCachingCatalog(HiveCatalog catalog, long expirationMs, boolean
caseSensitive) {
+ super(catalog, caseSensitive, expirationMs, Ticker.systemTicker());
this.hiveCatalog = catalog;
+ if (catalog.getConf().getBoolean("metastore.iceberg.catalog.cache.debug",
false)) {
+ cacheRef = new SoftReference<>(this);
+ }
+ }
+
+ /**
+ * Callback when cache invalidates the entry for a given table identifier.
+ *
+ * @param tid the table identifier to invalidate
+ */
+ protected void onCacheInvalidate(TableIdentifier tid) {
+ cacheInvalidateCount.incrementAndGet();
+ LOG.debug("Cache invalidate {}: {}", tid, cacheInvalidateCount.get());
+ }
+
+ /**
+ * Callback when cache loads a table for a given table identifier.
+ *
+ * @param tid the table identifier
+ */
+ protected void onCacheLoad(TableIdentifier tid) {
+ cacheLoadCount.incrementAndGet();
+ LOG.debug("Cache load {}: {}", tid, cacheLoadCount.get());
+ }
+
+ /**
+ * Callback when cache hit for a given table identifier.
+ *
+ * @param tid the table identifier
+ */
+ protected void onCacheHit(TableIdentifier tid) {
+ cacheHitCount.incrementAndGet();
+ LOG.debug("Cache hit {} : {}", tid, cacheHitCount.get());
+ }
+
+ /**
+ * Callback when cache miss occurs for a given table identifier.
+ *
+ * @param tid the table identifier
+ */
+ protected void onCacheMiss(TableIdentifier tid) {
+ cacheMissCount.incrementAndGet();
+ LOG.debug("Cache miss {}: {}", tid, cacheMissCount.get());
+ }
+
+ /**
+ * Callback when cache loads a metadata table for a given table identifier.
+ *
+ * @param tid the table identifier
+ */
+ protected void onCacheMetaLoad(TableIdentifier tid) {
+ cacheMetaLoadCount.incrementAndGet();
+ LOG.debug("Cache meta-load {}: {}", tid, cacheMetaLoadCount.get());
+ }
+
+ // Getter methods for accessing metrics
+ public long getCacheHitCount() {
+ return cacheHitCount.get();
}
+ public long getCacheMissCount() {
+ return cacheMissCount.get();
+ }
+
+ public long getCacheLoadCount() {
+ return cacheLoadCount.get();
+ }
+
+ public long getCacheInvalidateCount() {
+ return cacheInvalidateCount.get();
+ }
+
+ public long getCacheMetaLoadCount() {
+ return cacheMetaLoadCount.get();
+ }
+
+ public double getCacheHitRate() {
+ long hits = cacheHitCount.get();
+ long total = hits + cacheMissCount.get();
+ return total == 0 ? 0.0 : (double) hits / total;
+ }
+
+ /**
+ * Generates a map of this cache's performance metrics, including hit count,
+ * miss count, load count, invalidate count, meta-load count, and hit rate.
+ * This can be used for monitoring and debugging purposes to understand the
effectiveness of the cache.
+ * @return a map of cache performance metrics
+ */
+ public Map<String, Number> cacheStats() {
+ return Map.of(
+ "hit", getCacheHitCount(),
+ "miss", getCacheMissCount(),
+ "load", getCacheLoadCount(),
+ "invalidate", getCacheInvalidateCount(),
+ "metaload", getCacheMetaLoadCount(),
+ "hit-rate", getCacheHitRate()
+ );
+ }
+
+
@Override
- public Catalog.TableBuilder buildTable(TableIdentifier identifier, Schema
schema) {
- return hiveCatalog.buildTable(identifier, schema);
+ public void createNamespace(Namespace namespace, Map<String, String> map) {
+ hiveCatalog.createNamespace(namespace, map);
}
@Override
- public void createNamespace(Namespace nmspc, Map<String, String> map) {
- hiveCatalog.createNamespace(nmspc, map);
+ public List<Namespace> listNamespaces(Namespace namespace) throws
NoSuchNamespaceException {
+ return hiveCatalog.listNamespaces(namespace);
}
@Override
- public List<Namespace> listNamespaces(Namespace nmspc) throws
NoSuchNamespaceException {
- return hiveCatalog.listNamespaces(nmspc);
+ public Table loadTable(final TableIdentifier identifier) {
+ final TableIdentifier canonicalized = identifier.toLowerCase();
+ final Table cachedTable = tableCache.getIfPresent(canonicalized);
+ if (cachedTable != null) {
+ final String location = new
MetadataLocator(hiveCatalog).getLocation(canonicalized);
+ if (location == null) {
+ LOG.debug("Table {} has no location, returning cached table without
location", canonicalized);
+ } else {
+ String cachedLocation = cachedTable instanceof HasTableOperations
tableOps
+ ? tableOps.operations().current().metadataFileLocation()
+ : null;
+ if (!location.equals(cachedLocation)) {
+ LOG.debug("Invalidate table {}, cached {} != actual {}",
canonicalized, cachedLocation, location);
+ // Invalidate the cached table if the location is different
+ invalidateTable(canonicalized);
+ onCacheInvalidate(canonicalized);
+ } else {
+ onCacheHit(canonicalized);
+ return cachedTable;
+ }
+ }
+ } else {
+ onCacheMiss(canonicalized);
+ }
+ final Table table = tableCache.get(canonicalized,
this::loadTableWithoutCache);
+ if (table instanceof BaseMetadataTable) {
+ // Cache underlying table
+ TableIdentifier originTableIdentifier =
+ TableIdentifier.of(canonicalized.namespace().levels());
+ Table originTable = tableCache.get(originTableIdentifier,
this::loadTableWithoutCache);
+ // Share TableOperations instance of origin table for all metadata
tables, so that metadata
+ // table instances are refreshed as well when origin table instance is
refreshed.
+ if (originTable instanceof HasTableOperations tableOps) {
+ TableOperations ops = tableOps.operations();
+ MetadataTableType type = MetadataTableType.from(canonicalized.name());
+ Table metadataTable =
+ MetadataTableUtils.createMetadataTableInstance(
+ ops, hiveCatalog.name(), originTableIdentifier,
canonicalized, type);
+ tableCache.put(canonicalized, metadataTable);
+ onCacheMetaLoad(canonicalized);
+ LOG.debug("Loaded metadata table: {} for origin table: {}",
canonicalized, originTableIdentifier);
+ // Return the metadata table instead of the original table
+ return metadataTable;
+ }
+ }
+ onCacheLoad(canonicalized);
+ return table;
+ }
+
+ private Table loadTableWithoutCache(TableIdentifier identifier) {
+ try {
+ return hiveCatalog.loadTable(identifier);
+ } catch (NoSuchTableException exception) {
+ return null;
Review Comment:
Is there some reason for returning `null` when the table doesn't exist,
rather than the exception? Does the cache takes care of `nulls`
##########
standalone-metastore/metastore-rest-catalog/src/main/java/org/apache/iceberg/rest/HMSCachingCatalog.java:
##########
@@ -20,78 +20,276 @@
package org.apache.iceberg.rest;
import com.github.benmanes.caffeine.cache.Ticker;
+
+import java.lang.ref.SoftReference;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Function;
+
+import org.apache.iceberg.BaseMetadataTable;
import org.apache.iceberg.CachingCatalog;
+import org.apache.iceberg.HasTableOperations;
+import org.apache.iceberg.MetadataTableType;
+import org.apache.iceberg.MetadataTableUtils;
import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.TableOperations;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.SupportsNamespaces;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.catalog.ViewCatalog;
import org.apache.iceberg.exceptions.NamespaceNotEmptyException;
import org.apache.iceberg.exceptions.NoSuchNamespaceException;
+import org.apache.iceberg.exceptions.NoSuchTableException;
import org.apache.iceberg.hive.HiveCatalog;
+import org.apache.iceberg.hive.MetadataLocator;
import org.apache.iceberg.view.View;
import org.apache.iceberg.view.ViewBuilder;
-
+import org.jetbrains.annotations.TestOnly;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Class that wraps an Iceberg Catalog to cache tables.
*/
public class HMSCachingCatalog extends CachingCatalog implements
SupportsNamespaces, ViewCatalog {
+ protected static final Logger LOG =
LoggerFactory.getLogger(HMSCachingCatalog.class);
+
+ private static SoftReference<HMSCachingCatalog> cacheRef = new
SoftReference<>(null);
+ @TestOnly
+ public static <C extends Catalog> C
getLatestCache(Function<HMSCachingCatalog, C> extractor) {
+ HMSCachingCatalog cache = cacheRef.get();
+ if (cache == null) {
+ return null;
+ }
+ return extractor == null ? (C) cache : extractor.apply(cache);
+ }
+
+ @TestOnly
+ public HiveCatalog getCatalog() {
+ return hiveCatalog;
+ }
+
private final HiveCatalog hiveCatalog;
-
- public HMSCachingCatalog(HiveCatalog catalog, long expiration) {
- super(catalog, true, expiration, Ticker.systemTicker());
+ // Metrics counters
+ private final AtomicLong cacheHitCount = new AtomicLong(0);
+ private final AtomicLong cacheMissCount = new AtomicLong(0);
+ private final AtomicLong cacheLoadCount = new AtomicLong(0);
+ private final AtomicLong cacheInvalidateCount = new AtomicLong(0);
+ private final AtomicLong cacheMetaLoadCount = new AtomicLong(0);
+
+ public HMSCachingCatalog(HiveCatalog catalog, long expirationMs) {
+ this(catalog, expirationMs, /*caseSensitive*/ true);
+ }
+
+ public HMSCachingCatalog(HiveCatalog catalog, long expirationMs, boolean
caseSensitive) {
+ super(catalog, caseSensitive, expirationMs, Ticker.systemTicker());
this.hiveCatalog = catalog;
+ if (catalog.getConf().getBoolean("metastore.iceberg.catalog.cache.debug",
false)) {
+ cacheRef = new SoftReference<>(this);
+ }
+ }
+
+ /**
+ * Callback when cache invalidates the entry for a given table identifier.
+ *
+ * @param tid the table identifier to invalidate
+ */
+ protected void onCacheInvalidate(TableIdentifier tid) {
+ cacheInvalidateCount.incrementAndGet();
+ LOG.debug("Cache invalidate {}: {}", tid, cacheInvalidateCount.get());
+ }
+
+ /**
+ * Callback when cache loads a table for a given table identifier.
+ *
+ * @param tid the table identifier
+ */
+ protected void onCacheLoad(TableIdentifier tid) {
+ cacheLoadCount.incrementAndGet();
+ LOG.debug("Cache load {}: {}", tid, cacheLoadCount.get());
Review Comment:
shouldn't this and other be like this
```
protected void onCacheLoad(TableIdentifier tid) {
long count = cacheLoadCount.incrementAndGet();
LOG.debug("Cache load {}: {}", tid, count);
}
```
In case of concurrent executions, if we do `cacheLoadCount.get()` this might
return another value, rather than the intended value
##########
standalone-metastore/metastore-rest-catalog/src/main/java/org/apache/iceberg/rest/HMSCachingCatalog.java:
##########
@@ -20,78 +20,276 @@
package org.apache.iceberg.rest;
import com.github.benmanes.caffeine.cache.Ticker;
+
+import java.lang.ref.SoftReference;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Function;
+
+import org.apache.iceberg.BaseMetadataTable;
import org.apache.iceberg.CachingCatalog;
+import org.apache.iceberg.HasTableOperations;
+import org.apache.iceberg.MetadataTableType;
+import org.apache.iceberg.MetadataTableUtils;
import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.TableOperations;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.SupportsNamespaces;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.catalog.ViewCatalog;
import org.apache.iceberg.exceptions.NamespaceNotEmptyException;
import org.apache.iceberg.exceptions.NoSuchNamespaceException;
+import org.apache.iceberg.exceptions.NoSuchTableException;
import org.apache.iceberg.hive.HiveCatalog;
+import org.apache.iceberg.hive.MetadataLocator;
import org.apache.iceberg.view.View;
import org.apache.iceberg.view.ViewBuilder;
-
+import org.jetbrains.annotations.TestOnly;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Class that wraps an Iceberg Catalog to cache tables.
*/
public class HMSCachingCatalog extends CachingCatalog implements
SupportsNamespaces, ViewCatalog {
+ protected static final Logger LOG =
LoggerFactory.getLogger(HMSCachingCatalog.class);
+
+ private static SoftReference<HMSCachingCatalog> cacheRef = new
SoftReference<>(null);
+ @TestOnly
+ public static <C extends Catalog> C
getLatestCache(Function<HMSCachingCatalog, C> extractor) {
+ HMSCachingCatalog cache = cacheRef.get();
+ if (cache == null) {
+ return null;
+ }
+ return extractor == null ? (C) cache : extractor.apply(cache);
+ }
+
+ @TestOnly
+ public HiveCatalog getCatalog() {
+ return hiveCatalog;
+ }
+
private final HiveCatalog hiveCatalog;
-
- public HMSCachingCatalog(HiveCatalog catalog, long expiration) {
- super(catalog, true, expiration, Ticker.systemTicker());
+ // Metrics counters
+ private final AtomicLong cacheHitCount = new AtomicLong(0);
+ private final AtomicLong cacheMissCount = new AtomicLong(0);
+ private final AtomicLong cacheLoadCount = new AtomicLong(0);
+ private final AtomicLong cacheInvalidateCount = new AtomicLong(0);
+ private final AtomicLong cacheMetaLoadCount = new AtomicLong(0);
+
+ public HMSCachingCatalog(HiveCatalog catalog, long expirationMs) {
+ this(catalog, expirationMs, /*caseSensitive*/ true);
+ }
+
+ public HMSCachingCatalog(HiveCatalog catalog, long expirationMs, boolean
caseSensitive) {
+ super(catalog, caseSensitive, expirationMs, Ticker.systemTicker());
this.hiveCatalog = catalog;
+ if (catalog.getConf().getBoolean("metastore.iceberg.catalog.cache.debug",
false)) {
+ cacheRef = new SoftReference<>(this);
+ }
+ }
+
+ /**
+ * Callback when cache invalidates the entry for a given table identifier.
+ *
+ * @param tid the table identifier to invalidate
+ */
+ protected void onCacheInvalidate(TableIdentifier tid) {
+ cacheInvalidateCount.incrementAndGet();
+ LOG.debug("Cache invalidate {}: {}", tid, cacheInvalidateCount.get());
+ }
+
+ /**
+ * Callback when cache loads a table for a given table identifier.
+ *
+ * @param tid the table identifier
+ */
+ protected void onCacheLoad(TableIdentifier tid) {
+ cacheLoadCount.incrementAndGet();
+ LOG.debug("Cache load {}: {}", tid, cacheLoadCount.get());
+ }
+
+ /**
+ * Callback when cache hit for a given table identifier.
+ *
+ * @param tid the table identifier
+ */
+ protected void onCacheHit(TableIdentifier tid) {
+ cacheHitCount.incrementAndGet();
+ LOG.debug("Cache hit {} : {}", tid, cacheHitCount.get());
+ }
+
+ /**
+ * Callback when cache miss occurs for a given table identifier.
+ *
+ * @param tid the table identifier
+ */
+ protected void onCacheMiss(TableIdentifier tid) {
+ cacheMissCount.incrementAndGet();
+ LOG.debug("Cache miss {}: {}", tid, cacheMissCount.get());
+ }
+
+ /**
+ * Callback when cache loads a metadata table for a given table identifier.
+ *
+ * @param tid the table identifier
+ */
+ protected void onCacheMetaLoad(TableIdentifier tid) {
+ cacheMetaLoadCount.incrementAndGet();
+ LOG.debug("Cache meta-load {}: {}", tid, cacheMetaLoadCount.get());
+ }
+
+ // Getter methods for accessing metrics
+ public long getCacheHitCount() {
+ return cacheHitCount.get();
}
+ public long getCacheMissCount() {
+ return cacheMissCount.get();
+ }
+
+ public long getCacheLoadCount() {
+ return cacheLoadCount.get();
+ }
+
+ public long getCacheInvalidateCount() {
+ return cacheInvalidateCount.get();
+ }
+
+ public long getCacheMetaLoadCount() {
+ return cacheMetaLoadCount.get();
+ }
+
+ public double getCacheHitRate() {
+ long hits = cacheHitCount.get();
+ long total = hits + cacheMissCount.get();
+ return total == 0 ? 0.0 : (double) hits / total;
+ }
+
+ /**
+ * Generates a map of this cache's performance metrics, including hit count,
+ * miss count, load count, invalidate count, meta-load count, and hit rate.
+ * This can be used for monitoring and debugging purposes to understand the
effectiveness of the cache.
+ * @return a map of cache performance metrics
+ */
+ public Map<String, Number> cacheStats() {
+ return Map.of(
+ "hit", getCacheHitCount(),
+ "miss", getCacheMissCount(),
+ "load", getCacheLoadCount(),
+ "invalidate", getCacheInvalidateCount(),
+ "metaload", getCacheMetaLoadCount(),
+ "hit-rate", getCacheHitRate()
+ );
+ }
+
+
@Override
- public Catalog.TableBuilder buildTable(TableIdentifier identifier, Schema
schema) {
- return hiveCatalog.buildTable(identifier, schema);
+ public void createNamespace(Namespace namespace, Map<String, String> map) {
+ hiveCatalog.createNamespace(namespace, map);
}
@Override
- public void createNamespace(Namespace nmspc, Map<String, String> map) {
- hiveCatalog.createNamespace(nmspc, map);
+ public List<Namespace> listNamespaces(Namespace namespace) throws
NoSuchNamespaceException {
+ return hiveCatalog.listNamespaces(namespace);
}
@Override
- public List<Namespace> listNamespaces(Namespace nmspc) throws
NoSuchNamespaceException {
- return hiveCatalog.listNamespaces(nmspc);
+ public Table loadTable(final TableIdentifier identifier) {
+ final TableIdentifier canonicalized = identifier.toLowerCase();
+ final Table cachedTable = tableCache.getIfPresent(canonicalized);
+ if (cachedTable != null) {
+ final String location = new
MetadataLocator(hiveCatalog).getLocation(canonicalized);
+ if (location == null) {
+ LOG.debug("Table {} has no location, returning cached table without
location", canonicalized);
+ } else {
+ String cachedLocation = cachedTable instanceof HasTableOperations
tableOps
+ ? tableOps.operations().current().metadataFileLocation()
+ : null;
+ if (!location.equals(cachedLocation)) {
+ LOG.debug("Invalidate table {}, cached {} != actual {}",
canonicalized, cachedLocation, location);
+ // Invalidate the cached table if the location is different
+ invalidateTable(canonicalized);
+ onCacheInvalidate(canonicalized);
+ } else {
+ onCacheHit(canonicalized);
+ return cachedTable;
+ }
+ }
+ } else {
+ onCacheMiss(canonicalized);
+ }
+ final Table table = tableCache.get(canonicalized,
this::loadTableWithoutCache);
+ if (table instanceof BaseMetadataTable) {
+ // Cache underlying table
+ TableIdentifier originTableIdentifier =
+ TableIdentifier.of(canonicalized.namespace().levels());
+ Table originTable = tableCache.get(originTableIdentifier,
this::loadTableWithoutCache);
+ // Share TableOperations instance of origin table for all metadata
tables, so that metadata
+ // table instances are refreshed as well when origin table instance is
refreshed.
+ if (originTable instanceof HasTableOperations tableOps) {
+ TableOperations ops = tableOps.operations();
+ MetadataTableType type = MetadataTableType.from(canonicalized.name());
Review Comment:
if `type` returns `null`, I think we will hit NPE, can you check once
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]