[ https://issues.apache.org/jira/browse/HIVE-27186?focusedWorklogId=861111&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-861111 ]
ASF GitHub Bot logged work on HIVE-27186: ----------------------------------------- Author: ASF GitHub Bot Created on: 09/May/23 04:02 Start Date: 09/May/23 04:02 Worklog Time Spent: 10m Work Description: dengzhhu653 commented on code in PR #4194: URL: https://github.com/apache/hive/pull/4194#discussion_r1188094363 ########## standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/properties/CachingPropertyStore.java: ########## @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import static org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars.PROPERTIES_CACHE_CAPACITY; +import static org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars.PROPERTIES_CACHE_LOADFACTOR; + +import java.util.Iterator; +import java.util.Map; +import java.util.Objects; +import java.util.TreeMap; +import java.util.UUID; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.function.Predicate; + +/** + * A property map store using a pull-thru cache. + * <p> + * Before a map is returned, a check against the stored corresponding digest is performed to avoid + * using stale data. + * </p> + */ +public class CachingPropertyStore extends PropertyStore { + protected final SoftCache<String, PropertyMap> maps; + protected final PropertyStore store; + public CachingPropertyStore(PropertyStore wrap) { + this(wrap, new Configuration()); + } + + public CachingPropertyStore(PropertyStore wrap, Configuration conf) { + store = wrap; + int capacity = MetastoreConf.getIntVar(conf, MetastoreConf.ConfVars.PROPERTIES_CACHE_CAPACITY); + double fillFactor = MetastoreConf.getDoubleVar(conf, MetastoreConf.ConfVars.PROPERTIES_CACHE_LOADFACTOR); + maps = new SoftCache<>(capacity, fillFactor, false); + } + public void clearCache() { + maps.clear(); + } + + @Override public UUID fetchDigest(String mapKey) { + return store.fetchDigest(mapKey); + } + + @Override + public Map<String, UUID> selectDigest(String keyPrefix, Predicate<String> keyFilter) { + return store.selectDigest(keyPrefix, keyFilter); + } + + @Override + public PropertyMap fetchProperties(final String mapKey, final Function<String, PropertySchema> getSchema) { + synchronized(this) { + PropertyMap map = maps.compute(mapKey, mapsCompute(mapKey, getSchema)); + // we always return a copy of the properties in the cache + return map != null? map.copy() : null; + } + } + + BiFunction<String, PropertyMap, PropertyMap> mapsCompute(String string, Function<String, PropertySchema> getSchema) { + return (k, v) -> { + PropertyMap map = v; + if (map != null) { + UUID digest = map.getDigest(); + UUID fetchedDigest = fetchDigest(string); + if (fetchedDigest != null && !Objects.equals(digest, fetchedDigest)) { + map = null; + } + } + if (map == null) { + map = store.fetchProperties(string, getSchema); + } + return map; + }; + } + + @Override + public Map<String, PropertyMap> selectProperties(final String keyPrefix, Predicate<String> keyFilter, Function<String, PropertySchema> getSchema) { + final Map<String, PropertyMap> results = new TreeMap<>(); + // go select the digests for the maps we seek + final Map<String, UUID> digests = store.selectDigest(keyPrefix, keyFilter); + final Iterator<Map.Entry<String, UUID>> idigest = digests.entrySet().iterator(); + while (idigest.hasNext()) { + Map.Entry<String, UUID> entry = idigest.next(); + String key = entry.getKey(); + PropertyMap map = maps.get(key); + // remove from maps to select and add to results if in the cache and digest is valid + if (map != null && Objects.equals(map.getDigest(), entry.getValue())) { + results.put(key, map.copy()); + idigest.remove(); + } + } + // digests now contains the names of maps required that are not results + Map<String, PropertyMap> selectedMaps = store.selectProperties(keyPrefix, digests::containsKey, getSchema); + // we cache those new maps and for each add the copy to the result if we have not loaded and cached it concurrently + selectedMaps.forEach((k, v) -> { + PropertyMap m = maps.putIfAbsent(k, v); + results.put(k, m != null && m.isDirty()? m : v.copy()); + }); + return results; + } + + @Override + public void saveProperties(String mapKey, PropertyMap map) { + synchronized(this) { + store.saveProperties(mapKey, map); + maps.put(mapKey, map); + } + } + + @Override + protected boolean dropProperties(String mapKey) { + synchronized(this) { + boolean b = store.dropProperties(mapKey); + maps.clear(); + return b; + } + } + + @Override + public boolean renameProperties(String mapKey, String newKey) { + synchronized (this) { + // target is unencumbered + if (!maps.containsKey(newKey)) { + PropertyMap map = maps.remove(mapKey); + // we got a source + if (map != null) { + maps.put(newKey, map); + return true; + } Review Comment: nit: Should we also rename the properties stored in db? Issue Time Tracking ------------------- Worklog Id: (was: 861111) Time Spent: 18.5h (was: 18h 20m) > A persistent property store > ---------------------------- > > Key: HIVE-27186 > URL: https://issues.apache.org/jira/browse/HIVE-27186 > Project: Hive > Issue Type: Improvement > Components: Metastore > Affects Versions: 4.0.0-alpha-2 > Reporter: Henri Biestro > Assignee: Henri Biestro > Priority: Major > Labels: pull-request-available > Time Spent: 18.5h > Remaining Estimate: 0h > > WHAT > A persistent property store usable as a support facility for any metadata > augmentation feature. > WHY > When adding new meta-data oriented features, we usually need to persist > information linking the feature data and the HiveMetaStore objects it applies > to. Any information related to a database, a table or the cluster - like > statistics for example or any operational data state or data (think rolling > backup) - fall in this use-case. > Typically, accommodating such a feature requires modifying the Metastore > database schema by adding or altering a table. It also usually implies > modifying the thrift APIs to expose such meta-data to consumers. > The proposed feature wants to solve the persistence and query/transport for > these types of use-cases by exposing a 'key/(meta)value' store exposed as a > property system. > HOW > A property-value model is the simple and generic exposed API. > To provision for several usage scenarios, the model entry point is a > 'namespace' that qualifies the feature-component property manager. For > example, 'stats' could be the namespace for all properties related to the > 'statistics' feature. > The namespace identifies a manager that handles property-groups persisted as > property-maps. For instance, all statistics pertaining to a given table would > be collocated in the same property-group. As such, all properties (say number > of 'unique_values' per columns) for a given HMS table 'relation0' would all > be stored and persisted in the same property-map instance. > Property-maps may be decorated by an (optional) schema that may declare the > name and value-type of allowed properties (and their optional default value). > Each property is addressed by a name, a path uniquely identifying the > property in a given property map. > The manager also handles transforming property-map names to the property-map > keys used to persist them in the DB. > The API provides inserting/updating properties in bulk transactionally. It > also provides selection/projection to help reduce the volume of exchange > between client/server; selection can use (JEXL expression) predicates to > filter maps. -- This message was sent by Atlassian Jira (v8.20.10#820010)