[ 
https://issues.apache.org/jira/browse/HIVE-27186?focusedWorklogId=861111&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-861111
 ]

ASF GitHub Bot logged work on HIVE-27186:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 09/May/23 04:02
            Start Date: 09/May/23 04:02
    Worklog Time Spent: 10m 
      Work Description: dengzhhu653 commented on code in PR #4194:
URL: https://github.com/apache/hive/pull/4194#discussion_r1188094363


##########
standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/properties/CachingPropertyStore.java:
##########
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+import static 
org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars.PROPERTIES_CACHE_CAPACITY;
+import static 
org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars.PROPERTIES_CACHE_LOADFACTOR;
+
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Objects;
+import java.util.TreeMap;
+import java.util.UUID;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+import java.util.function.Predicate;
+
+/**
+ * A property map store using a pull-thru cache.
+ * <p>
+ * Before a map is returned, a check against the stored corresponding digest 
is performed to avoid
+ * using stale data.
+ * </p>
+ */
+public class CachingPropertyStore extends PropertyStore {
+  protected final SoftCache<String, PropertyMap> maps;
+  protected final PropertyStore store;
+  public CachingPropertyStore(PropertyStore wrap) {
+    this(wrap, new Configuration());
+  }
+
+  public CachingPropertyStore(PropertyStore wrap, Configuration conf) {
+    store = wrap;
+    int capacity = MetastoreConf.getIntVar(conf, 
MetastoreConf.ConfVars.PROPERTIES_CACHE_CAPACITY);
+    double fillFactor = MetastoreConf.getDoubleVar(conf, 
MetastoreConf.ConfVars.PROPERTIES_CACHE_LOADFACTOR);
+    maps = new SoftCache<>(capacity, fillFactor, false);
+  }
+  public void clearCache() {
+    maps.clear();
+  }
+
+  @Override public UUID fetchDigest(String mapKey) {
+    return store.fetchDigest(mapKey);
+  }
+
+  @Override
+  public Map<String, UUID> selectDigest(String keyPrefix, Predicate<String> 
keyFilter) {
+    return store.selectDigest(keyPrefix, keyFilter);
+  }
+
+  @Override
+  public PropertyMap fetchProperties(final String mapKey, final 
Function<String, PropertySchema> getSchema) {
+    synchronized(this) {
+      PropertyMap map = maps.compute(mapKey, mapsCompute(mapKey, getSchema));
+      // we always return a copy of the properties in the cache
+      return map != null? map.copy() : null;
+    }
+  }
+
+  BiFunction<String, PropertyMap, PropertyMap> mapsCompute(String string, 
Function<String, PropertySchema> getSchema) {
+    return (k, v) -> {
+      PropertyMap map = v;
+      if (map != null) {
+        UUID digest = map.getDigest();
+        UUID fetchedDigest = fetchDigest(string);
+        if (fetchedDigest != null && !Objects.equals(digest, fetchedDigest)) {
+          map = null;
+        }
+      }
+      if (map == null) {
+        map = store.fetchProperties(string, getSchema);
+      }
+      return map;
+    };
+  }
+
+  @Override
+  public Map<String, PropertyMap> selectProperties(final String keyPrefix, 
Predicate<String> keyFilter, Function<String, PropertySchema> getSchema) {
+    final Map<String, PropertyMap> results = new TreeMap<>();
+    // go select the digests for the maps we seek
+    final Map<String, UUID> digests = store.selectDigest(keyPrefix, keyFilter);
+    final Iterator<Map.Entry<String, UUID>> idigest = 
digests.entrySet().iterator();
+    while (idigest.hasNext()) {
+      Map.Entry<String, UUID> entry = idigest.next();
+      String key = entry.getKey();
+      PropertyMap map = maps.get(key);
+      // remove from maps to select and add to results if in the cache and 
digest is valid
+      if (map != null && Objects.equals(map.getDigest(), entry.getValue())) {
+        results.put(key, map.copy());
+        idigest.remove();
+      }
+    }
+    // digests now contains the names of maps required that are not results
+    Map<String, PropertyMap> selectedMaps = store.selectProperties(keyPrefix, 
digests::containsKey, getSchema);
+    // we cache those new maps and for each add the copy to the result if we 
have not loaded and cached it concurrently
+    selectedMaps.forEach((k, v) -> {
+      PropertyMap m = maps.putIfAbsent(k, v);
+      results.put(k, m != null && m.isDirty()? m : v.copy());
+    });
+    return results;
+  }
+
+  @Override
+  public void saveProperties(String mapKey, PropertyMap map) {
+    synchronized(this) {
+      store.saveProperties(mapKey, map);
+      maps.put(mapKey, map);
+    }
+  }
+
+  @Override
+  protected boolean dropProperties(String mapKey) {
+    synchronized(this) {
+      boolean b = store.dropProperties(mapKey);
+      maps.clear();
+      return b;
+    }
+  }
+
+  @Override
+  public boolean renameProperties(String mapKey, String newKey) {
+    synchronized (this) {
+      // target is unencumbered
+      if (!maps.containsKey(newKey)) {
+        PropertyMap map = maps.remove(mapKey);
+        // we got a source
+        if (map != null) {
+          maps.put(newKey, map);
+          return true;
+        }

Review Comment:
   nit: Should we also rename the properties stored in db?





Issue Time Tracking
-------------------

    Worklog Id:     (was: 861111)
    Time Spent: 18.5h  (was: 18h 20m)

> A persistent property store 
> ----------------------------
>
>                 Key: HIVE-27186
>                 URL: https://issues.apache.org/jira/browse/HIVE-27186
>             Project: Hive
>          Issue Type: Improvement
>          Components: Metastore
>    Affects Versions: 4.0.0-alpha-2
>            Reporter: Henri Biestro
>            Assignee: Henri Biestro
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 18.5h
>  Remaining Estimate: 0h
>
> WHAT
> A persistent property store usable as a support facility for any metadata 
> augmentation feature.
> WHY
> When adding new meta-data oriented features, we usually need to persist 
> information linking the feature data and the HiveMetaStore objects it applies 
> to. Any information related to a database, a table or the cluster - like 
> statistics for example or any operational data state or data (think rolling 
> backup) -  fall in this use-case.
> Typically, accommodating such a feature requires modifying the Metastore 
> database schema by adding or altering a table. It also usually implies 
> modifying the thrift APIs to expose such meta-data to consumers.
> The proposed feature wants to solve the persistence and query/transport for 
> these types of use-cases by exposing a 'key/(meta)value' store exposed as a 
> property system.
> HOW
> A property-value model is the simple and generic exposed API.
> To provision for several usage scenarios, the model entry point is a 
> 'namespace' that qualifies the feature-component property manager. For 
> example, 'stats' could be the namespace for all properties related to the 
> 'statistics' feature.
> The namespace identifies a manager that handles property-groups persisted as 
> property-maps. For instance, all statistics pertaining to a given table would 
> be collocated in the same property-group. As such, all properties (say number 
> of 'unique_values' per columns) for a given HMS table 'relation0' would all 
> be stored and persisted in the same property-map instance.
> Property-maps may be decorated by an (optional) schema that may declare the 
> name and value-type of allowed properties (and their optional default value). 
> Each property is addressed by a name, a path uniquely identifying the 
> property in a given property map.
> The manager also handles transforming property-map names to the property-map 
> keys used to persist them in the DB.
> The API provides inserting/updating properties in bulk transactionally. It 
> also provides selection/projection to help reduce the volume of exchange 
> between client/server; selection can use (JEXL expression) predicates to 
> filter maps.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to