murblanc commented on a change in pull request #1504:
URL: https://github.com/apache/lucene-solr/pull/1504#discussion_r428455296



##########
File path: 
solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java
##########
@@ -429,87 +463,149 @@ private void release(SessionWrapper sessionWrapper) {
      * The session can be used by others while the caller is performing 
operations
      */
     private void returnSession(SessionWrapper sessionWrapper) {
-      TimeSource timeSource = sessionWrapper.session != null ? 
sessionWrapper.session.cloudManager.getTimeSource() : TimeSource.NANO_TIME;
+      boolean present;
       synchronized (lockObj) {
         sessionWrapper.status = Status.EXECUTING;
-        if (log.isDebugEnabled()) {
-          log.debug("returnSession, curr-time {} sessionWrapper.createTime {}, 
this.sessionWrapper.createTime {} "
-              , time(timeSource, MILLISECONDS),
-              sessionWrapper.createTime,
-              this.sessionWrapper.createTime);
-        }
-        if (sessionWrapper.createTime == this.sessionWrapper.createTime) {
-          //this session was used for computing new operations and this can 
now be used for other
-          // computing
-          this.sessionWrapper = sessionWrapper;
+        present = sessionWrapperSet.contains(sessionWrapper);
 
-          //one thread who is waiting for this need to be notified.
-          lockObj.notify();
-        } else {
-          log.debug("create time NOT SAME {} ", 
SessionWrapper.DEFAULT_INSTANCE.createTime);
-          //else just ignore it
-        }
+        // wake up single thread waiting for a session return (ok if not woken 
up, wait is short)
+        // Important to wake up a single one, otherwise of multiple waiting 
threads, all but one will immediately create new sessions
+        lockObj.notify();
       }
 
+      // Logging
+      if (present) {
+        if (log.isDebugEnabled()) {
+          log.debug("returnSession {}", sessionWrapper.getCreateTime());
+        }
+      } else {
+        log.warn("returning unknown session {} ", 
sessionWrapper.getCreateTime());
+      }
     }
 
-
-    public SessionWrapper get(SolrCloudManager cloudManager) throws 
IOException, InterruptedException {
+    /**
+     * <p>Method returning an available session that can be used for {@link 
Status#COMPUTING}, either from the
+     * {@link #sessionWrapperSet} cache or by creating a new one. The status 
of the returned session is set to {@link Status#COMPUTING}.</p>
+     *
+     * Some waiting is done in two cases:
+     * <ul>
+     *   <li>A candidate session is present in {@link #sessionWrapperSet} but 
is still {@link Status#COMPUTING}, a random wait
+     *   is observed to see if the session gets freed to save a session 
creation and allow session reuse,</li>
+     *   <li>It is necessary to create a new session but there are already 
sessions in the process of being created, a
+     *   random wait is observed (if no waiting already occurred waiting for a 
session to become free) before creation
+     *   takes place, just in case one of the created sessions got used then 
{@link #returnSession(SessionWrapper)} in the meantime.</li>
+     * </ul>
+     *
+     * The random wait prevents the "thundering herd" effect when all threads 
needing a session at the same time create a new
+     * one even though some differentiated waits could have led to better 
reuse and less session creations.
+     *
+     * @param allowWait usually <code>true</code> except in tests that know 
there's no point in waiting because nothing
+     *                  will happen...
+     */
+    public SessionWrapper get(SolrCloudManager cloudManager, boolean 
allowWait) throws IOException, InterruptedException {
       TimeSource timeSource = cloudManager.getTimeSource();
+      long oldestUpdateTimeNs = 
TimeUnit.SECONDS.convert(timeSource.getTimeNs(), TimeUnit.NANOSECONDS) - 
SESSION_EXPIRY;
+      int zkVersion = 
cloudManager.getDistribStateManager().getAutoScalingConfig().getZkVersion();
+
       synchronized (lockObj) {
-        if (sessionWrapper.status == Status.NULL ||
-            sessionWrapper.zkVersion != 
cloudManager.getDistribStateManager().getAutoScalingConfig().getZkVersion() ||
-            TimeUnit.SECONDS.convert(timeSource.getTimeNs() - 
sessionWrapper.lastUpdateTime, TimeUnit.NANOSECONDS) > SESSION_EXPIRY) {
-          //no session available or the session is expired
-          return createSession(cloudManager);
-        } else {
+        SessionWrapper sw = getAvailableSession(zkVersion, oldestUpdateTimeNs);
+
+        // Best case scenario: an available session
+        if (sw != null) {
+          if (log.isDebugEnabled()) {
+            log.debug("reusing session {}", sw.getCreateTime());
+          }
+          return sw;
+        }
+
+        // Wait for a while before deciding what to do if waiting could help...
+        if ((creationsInProgress != 0 || hasCandidateSession(zkVersion, 
oldestUpdateTimeNs)) && allowWait) {

Review comment:
       Put it in purpose at the end: it's only true in tests




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to