baohe-zhang commented on a change in pull request #28412:
URL: https://github.com/apache/spark/pull/28412#discussion_r451892343



##########
File path: core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala
##########
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.io.IOException
+import java.util.Collection
+import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent.atomic.AtomicBoolean
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.util.kvstore._
+
+/**
+ * An implementation of KVStore that accelerates event logs loading.
+ *
+ * When rebuilding the application state from event logs, HybridStore will
+ * write data to InMemoryStore at first and use a background thread to dump
+ * data to LevelDB once the app store is restored. We don't expect write
+ * operations (except the case for caching) after calling switch to level DB.
+ */
+
+private[history] class HybridStore extends KVStore {
+
+  private val inMemoryStore = new InMemoryStore()
+
+  private var levelDB: LevelDB = null
+
+  // Flag to indicate whether we should use inMemoryStore or levelDB
+  private val shouldUseInMemoryStore = new AtomicBoolean(true)
+
+  // Flag to indicate whether this hybrid store is closed, use this flag
+  // to avoid starting background thread after the store is closed
+  private val closed = new AtomicBoolean(false)
+
+  // A background thread that dumps data from inMemoryStore to levelDB
+  private var backgroundThread: Thread = null
+
+  // A hash map that stores all classes that had been writen to inMemoryStore
+  private val klassMap = new ConcurrentHashMap[Class[_], Boolean]
+
+  override def getMetadata[T](klass: Class[T]): T = {
+    getStore().getMetadata(klass)
+  }
+
+  override def setMetadata(value: Object): Unit = {
+    getStore().setMetadata(value)
+  }
+
+  override def read[T](klass: Class[T], naturalKey: Object): T = {
+    getStore().read(klass, naturalKey)
+  }
+
+  override def write(value: Object): Unit = {
+    getStore().write(value)
+
+    if (backgroundThread == null) {
+      // New classes won't be dumped once the background thread is started
+      klassMap.putIfAbsent(value.getClass(), true)
+    }
+  }
+
+  override def delete(klass: Class[_], naturalKey: Object): Unit = {
+    if (backgroundThread != null) {
+      throw new IllegalStateException("delete() shouldn't be called after " +
+        "the hybrid store begins switching to levelDB")
+    }
+
+    getStore().delete(klass, naturalKey)
+  }
+
+  override def view[T](klass: Class[T]): KVStoreView[T] = {
+    getStore().view(klass)
+  }
+
+  override def count(klass: Class[_]): Long = {
+    getStore().count(klass)
+  }
+
+  override def count(klass: Class[_], index: String, indexedValue: Object): 
Long = {
+    getStore().count(klass, index, indexedValue)
+  }
+
+  override def close(): Unit = {
+    try {
+      closed.set(true)
+      if (backgroundThread != null && backgroundThread.isAlive()) {
+        // The background thread is still running, wait for it to finish
+        backgroundThread.join()
+      }
+    } finally {
+      inMemoryStore.close()
+      if (levelDB != null) {
+        levelDB.close()
+      }
+    }
+  }
+
+  override def removeAllByIndexValues[T](
+      klass: Class[T],
+      index: String,
+      indexValues: Collection[_]): Boolean = {
+    if (backgroundThread != null) {
+      throw new IllegalStateException("removeAllByIndexValues() shouldn't be " 
+
+        "called after the hybrid store begins switching to levelDB")
+    }
+
+    getStore().removeAllByIndexValues(klass, index, indexValues)
+  }
+
+  def setLevelDB(levelDB: LevelDB): Unit = {
+    this.levelDB = levelDB
+  }
+
+  /**
+   * This method is called when the writing is done for inMemoryStore. A
+   * background thread will be created and be started to dump data in 
inMemoryStore
+   * to levelDB. Once the dumping is completed, the underlying kvstore will be
+   * switched to levelDB.
+   */
+  def switchToLevelDB(
+      listener: HybridStore.SwitchToLevelDBListener,
+      appId: String,
+      attemptId: Option[String]): Unit = {
+    if (closed.get) {
+      return
+    }
+
+    backgroundThread = new Thread(() => {
+      try {
+        for (klass <- klassMap.keys().asScala) {
+          val it = inMemoryStore.view(klass).closeableIterator()
+          while (it.hasNext()) {
+            levelDB.write(it.next())
+          }

Review comment:
       Hi @mridulm, I updated your code and used it on in-memory store - 
leveldb switching, but only saw little switching time improvement. I am not 
sure if somewhere wrong.
   | log size, jobs and tasks per job            | 2 jobs, 400 tasks per job | 
10 jobs, 400 tasks per job | 50 jobs, 400 tasks per job | 100 jobs, 400 tasks 
per job | 200 jobs, 400 tasks per job | 500 jobs, 400 tasks per job | 1000 
jobs, 400 tasks per job | 5 jobs, 100000 tasks per job |
   | ------------------------------------------- | ------------------------- | 
-------------------------- | -------------------------- | 
--------------------------- | --------------------------- | 
--------------------------- | ---------------------------- | 
---------------------------- |
   | original switching time                     | 1s                        | 
2s                         | 4s                         | 8s                    
      | 16s                         | 37s                         | 65s         
                 | 90s                          |
   | switching time with write(Iterator<T> iter) | 1s                        | 
1s                         | 4s                         | 7s                    
      | 13s                         | 34s                         | 58s         
                 | 84s                          |
   
   The code:
   ```
   for (klass <- klassMap.keys().asScala) {
             val it = inMemoryStore.view(klass).closeableIterator()
             levelDB.write(it)
           }
   ```
   
   ```
       public <T> void write(Iterator<T> iter) throws Exception {
       Preconditions.checkArgument(iter != null, "Non-empty values required.");
   
       List<T> values = new ArrayList<>();
       iter.forEachRemaining(values::add);
   
       // Group by class, in case there are values from different classes in 
the iterator
       // Typical usecase is for this to be a single class.
       for (Map.Entry<? extends Class<?>, ? extends List<?>> entry :
               
values.stream().collect(Collectors.groupingBy(Object::getClass)).entrySet()) {
   
         final Iterator<?> valueIter = entry.getValue().iterator();
         final Iterator<byte[]> serializedValueIter;
   
         {
           // deserialize outside synchronized block
           List<byte[]> list = new ArrayList<>(entry.getValue().size());
           for (Object value : entry.getValue()) {
             list.add(serializer.serialize(value));
           }
           serializedValueIter = list.iterator();
         }
   
         final Class<?> valueClass = entry.getKey();
         final LevelDBTypeInfo ti = getTypeInfo(valueClass);
   
         // Batching updates per type
         synchronized (ti) {
           final LevelDBTypeInfo.Index naturalIndex = ti.naturalIndex();
           final Collection<LevelDBTypeInfo.Index> indices = ti.indices();
   
           try (WriteBatch batch = db().createWriteBatch()) {
             while (valueIter.hasNext()) {
               final Object value = valueIter.next();
   
               assert serializedValueIter.hasNext();
               final byte[] serializedObject = serializedValueIter.next();
   
               Object existing;
               try {
                 existing = get(naturalIndex.entityKey(null, value), 
valueClass);
               } catch (NoSuchElementException e) {
                 existing = null;
               }
   
               PrefixCache cache = new PrefixCache(value);
               byte[] naturalKey = 
naturalIndex.toKey(naturalIndex.getValue(value));
               for (LevelDBTypeInfo.Index idx : indices) {
                 byte[] prefix = cache.getPrefix(idx);
                 idx.add(batch, value, existing, serializedObject, naturalKey, 
prefix);
               }
             }
             assert !serializedValueIter.hasNext();
             db().write(batch);
           }
         }
       }
     }
   ```
   
   I think using multiple threads to write data to leveldb might shorten the 
switching time but it would introduce more overhead to SHS. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to