madrob commented on a change in pull request #238:
URL: https://github.com/apache/solr/pull/238#discussion_r682950270



##########
File path: 
solr/core/src/java/org/apache/solr/handler/admin/HealthCheckHandler.java
##########
@@ -61,6 +64,17 @@
  *   is fully initialized and stable before proceeding with restarting the 
next node, and thus
  *   reduce the risk of restarting the last live replica of a shard.
  * </p>
+ *
+ * <p>
+ *     For the legacy mode the handler returns status <code>200 OK</code> if 
all the cores configured as follower have
+ *     successfully replicated index from their respective leader after 
startup. Note that this is a weak check
+ *     i.e. once a follower has caught up with the leader the health check 
will keep reporting <code>200 OK</code>
+ *     even if the follower starts lagging behind. You should specify the 
acceptable generation lag lag follower should

Review comment:
       s/lag lag/lag/

##########
File path: 
solr/core/src/java/org/apache/solr/handler/admin/HealthCheckHandler.java
##########
@@ -18,18 +18,21 @@
 package org.apache.solr.handler.admin;
 
 import java.lang.invoke.MethodHandles;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
+import java.util.*;

Review comment:
       please avoid adding new wildcard imports.

##########
File path: 
solr/core/src/java/org/apache/solr/handler/admin/HealthCheckHandler.java
##########
@@ -135,6 +153,80 @@ public void handleRequestBody(SolrQueryRequest req, 
SolrQueryResponse rsp) throw
     rsp.add(STATUS, OK);
   }
 
+  private void healthCheckLegacyMode(SolrQueryRequest req, SolrQueryResponse 
rsp) {
+    Integer maxGenerationLag = 
req.getParams().getInt(HealthCheckRequest.PARAM_MAX_GENERATION_LAG);
+    List<String> laggingCoresInfo = new ArrayList<>();
+    boolean allCoresAreInSync = true;
+
+    // check only if max generation lag is specified
+    if(maxGenerationLag != null) {
+      for(SolrCore core : coreContainer.getCores()) {
+        ReplicationHandler replicationHandler =
+          (ReplicationHandler) core.getRequestHandler(ReplicationHandler.PATH);
+        // if maxGeneration lag is not specified don't check if follower is in 
sync.

Review comment:
       I don't understand this comment, we are in the middle of a "max 
generation specified" block.

##########
File path: solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
##########
@@ -233,6 +233,10 @@ public void setPollListener(PollListener pollListener) {
     this.pollListener = pollListener;
   }
 
+  public boolean isFollower() {

Review comment:
       Does this method make more sense on the SolrCore rather than the 
ReplicationHandler?

##########
File path: 
solr/core/src/java/org/apache/solr/handler/admin/HealthCheckHandler.java
##########
@@ -135,6 +153,80 @@ public void handleRequestBody(SolrQueryRequest req, 
SolrQueryResponse rsp) throw
     rsp.add(STATUS, OK);
   }
 
+  private void healthCheckLegacyMode(SolrQueryRequest req, SolrQueryResponse 
rsp) {
+    Integer maxGenerationLag = 
req.getParams().getInt(HealthCheckRequest.PARAM_MAX_GENERATION_LAG);
+    List<String> laggingCoresInfo = new ArrayList<>();
+    boolean allCoresAreInSync = true;
+
+    // check only if max generation lag is specified
+    if(maxGenerationLag != null) {
+      for(SolrCore core : coreContainer.getCores()) {
+        ReplicationHandler replicationHandler =
+          (ReplicationHandler) core.getRequestHandler(ReplicationHandler.PATH);
+        // if maxGeneration lag is not specified don't check if follower is in 
sync.
+        if(replicationHandler.isFollower()) {
+          boolean isCoreInSync =
+            generationLagFromLeader(core, replicationHandler, 
maxGenerationLag, laggingCoresInfo);
+
+          allCoresAreInSync &= isCoreInSync;
+        }
+      }
+    }
+
+    if(allCoresAreInSync) {
+      rsp.add("message",
+        String.format(Locale.ROOT, "All the followers are in sync with leader 
(within maxGenerationLag: %d) " +
+          "or all the cores are acting as leader", maxGenerationLag));
+      rsp.add(STATUS, OK);
+    } else {
+      rsp.add("message",
+        String.format(Locale.ROOT,"Cores violating maxGenerationLag:%d.\n%s", 
maxGenerationLag,
+          String.join(",\n", laggingCoresInfo)));
+      rsp.add(STATUS, FAILURE);
+    }
+  }
+
+  private boolean generationLagFromLeader(final SolrCore core, 
ReplicationHandler replicationHandler,
+                                          int maxGenerationLag, List<String> 
laggingCoresInfo) {
+    IndexFetcher indexFetcher = null;
+    try {
+      // may not be the best way to get leader's replicableCommit
+      @SuppressWarnings({"rawtypes"})
+      NamedList follower =
+        
ReplicationHandler.getObjectWithBackwardCompatibility(replicationHandler.getInitArgs(),
 "follower",
+                  "slave");
+      indexFetcher = new IndexFetcher(follower, replicationHandler, core);
+
+      @SuppressWarnings({"rawtypes"})
+      NamedList replicableCommitOnLeader = indexFetcher.getLatestVersion();
+      long leaderGeneration = (Long) replicableCommitOnLeader.get(GENERATION);
+
+      // Get our own commit and generation from the commit
+      IndexCommit commit = core.getDeletionPolicy().getLatestCommit();
+      if(commit != null) {
+        long followerGeneration = commit.getGeneration();
+        long generationDiff = leaderGeneration - followerGeneration;
+
+        // generationDiff should be within the threshold and generationDiff 
shouldn't be negative
+        if(generationDiff < maxGenerationLag && generationDiff >= 0) {
+          log.info("For core:[{}] generation lag is above acceptable 
threshold:[{}], " +
+                    "generation lag:[{}], leader generation:[{}], follower 
generation:[{}]",
+                    core, maxGenerationLag, generationDiff, leaderGeneration, 
followerGeneration);
+
+          laggingCoresInfo.add(String.format(Locale.ROOT, "Core %s is lagging 
by %d generations", core.getName(), generationDiff));
+          return true;
+        }
+      }
+    } catch (Exception e) {
+      log.error("Failed to check if the follower is in sync with the leader");

Review comment:
       log the exception as well?

##########
File path: 
solr/core/src/java/org/apache/solr/handler/admin/HealthCheckHandler.java
##########
@@ -135,6 +153,80 @@ public void handleRequestBody(SolrQueryRequest req, 
SolrQueryResponse rsp) throw
     rsp.add(STATUS, OK);
   }
 
+  private void healthCheckLegacyMode(SolrQueryRequest req, SolrQueryResponse 
rsp) {
+    Integer maxGenerationLag = 
req.getParams().getInt(HealthCheckRequest.PARAM_MAX_GENERATION_LAG);
+    List<String> laggingCoresInfo = new ArrayList<>();
+    boolean allCoresAreInSync = true;
+
+    // check only if max generation lag is specified
+    if(maxGenerationLag != null) {
+      for(SolrCore core : coreContainer.getCores()) {
+        ReplicationHandler replicationHandler =
+          (ReplicationHandler) core.getRequestHandler(ReplicationHandler.PATH);
+        // if maxGeneration lag is not specified don't check if follower is in 
sync.
+        if(replicationHandler.isFollower()) {
+          boolean isCoreInSync =
+            generationLagFromLeader(core, replicationHandler, 
maxGenerationLag, laggingCoresInfo);
+
+          allCoresAreInSync &= isCoreInSync;
+        }
+      }
+    }
+
+    if(allCoresAreInSync) {
+      rsp.add("message",
+        String.format(Locale.ROOT, "All the followers are in sync with leader 
(within maxGenerationLag: %d) " +
+          "or all the cores are acting as leader", maxGenerationLag));
+      rsp.add(STATUS, OK);
+    } else {
+      rsp.add("message",
+        String.format(Locale.ROOT,"Cores violating maxGenerationLag:%d.\n%s", 
maxGenerationLag,
+          String.join(",\n", laggingCoresInfo)));
+      rsp.add(STATUS, FAILURE);
+    }
+  }
+
+  private boolean generationLagFromLeader(final SolrCore core, 
ReplicationHandler replicationHandler,

Review comment:
       This name is confusing, maybe something like `isWithinGenerationLag`?

##########
File path: 
solr/core/src/java/org/apache/solr/handler/admin/HealthCheckHandler.java
##########
@@ -135,6 +153,80 @@ public void handleRequestBody(SolrQueryRequest req, 
SolrQueryResponse rsp) throw
     rsp.add(STATUS, OK);
   }
 
+  private void healthCheckLegacyMode(SolrQueryRequest req, SolrQueryResponse 
rsp) {
+    Integer maxGenerationLag = 
req.getParams().getInt(HealthCheckRequest.PARAM_MAX_GENERATION_LAG);
+    List<String> laggingCoresInfo = new ArrayList<>();
+    boolean allCoresAreInSync = true;
+
+    // check only if max generation lag is specified
+    if(maxGenerationLag != null) {

Review comment:
       Do we need to check for `> 0`?

##########
File path: solr/core/src/test/org/apache/solr/handler/ReplicationTestHelper.java
##########
@@ -0,0 +1,317 @@
+package org.apache.solr.handler;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.embedded.JettyConfig;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.util.FileUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.lang.invoke.MethodHandles;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Properties;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+public final class ReplicationTestHelper {
+
+    private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+    public static final String CONF_DIR = "solr"
+            + File.separator + "collection1" + File.separator + "conf"
+            + File.separator;
+
+
+    public static JettySolrRunner createAndStartJetty(SolrInstance instance) 
throws Exception {
+        FileUtils.copyFile(new File(SolrTestCaseJ4.TEST_HOME(), "solr.xml"), 
new File(instance.getHomeDir(), "solr.xml"));
+        Properties nodeProperties = new Properties();
+        nodeProperties.setProperty("solr.data.dir", instance.getDataDir());
+        JettyConfig jettyConfig = 
JettyConfig.builder().setContext("/solr").setPort(0).build();
+        JettySolrRunner jetty = new JettySolrRunner(instance.getHomeDir(), 
nodeProperties, jettyConfig);
+        jetty.start();
+        return jetty;
+    }
+
+    public static HttpSolrClient createNewSolrClient(String baseUrl) {
+        try {
+            // setup the client...
+            HttpSolrClient client = SolrTestCaseJ4.getHttpSolrClient(baseUrl, 
15000, 90000);
+            return client;
+        }
+        catch (Exception ex) {
+            throw new RuntimeException(ex);
+        }
+    }
+
+    public static int index(SolrClient s, Object... fields) throws Exception {
+        SolrInputDocument doc = new SolrInputDocument();
+        for (int i = 0; i < fields.length; i += 2) {
+            doc.addField((String) (fields[i]), fields[i + 1]);
+        }
+        return s.add(doc).getStatus();
+    }
+
+    /**
+     * character copy of file using UTF-8. If port is non-null, will be 
substituted any time "TEST_PORT" is found.
+     */
+    private static void copyFile(File src, File dst, Integer port, boolean 
internalCompression) throws IOException {
+        try (BufferedReader in = new BufferedReader(new InputStreamReader(new 
FileInputStream(src), StandardCharsets.UTF_8));
+             Writer out = new OutputStreamWriter(new FileOutputStream(dst), 
StandardCharsets.UTF_8)) {
+
+            for (String line = in.readLine(); null != line; line = 
in.readLine()) {
+                if (null != port) {
+                    line = line.replace("TEST_PORT", port.toString());
+                }
+                line = line.replace("COMPRESSION", internalCompression ? 
"internal" : "false");
+                out.write(line);
+            }
+        }
+    }
+
+    public static void assertVersions(SolrClient client1, SolrClient client2) 
throws Exception {
+        NamedList<Object> details = getDetails(client1);
+        @SuppressWarnings({"unchecked"})
+        ArrayList<NamedList<Object>> commits = (ArrayList<NamedList<Object>>) 
details.get("commits");
+        Long maxVersionClient1 = getVersion(client1);
+        Long maxVersionClient2 = getVersion(client2);
+
+        if (maxVersionClient1 > 0 && maxVersionClient2 > 0) {
+            assertEquals(maxVersionClient1, maxVersionClient2);
+        }
+
+        // check vs /replication?command=indexversion call
+        ModifiableSolrParams params = new ModifiableSolrParams();
+        params.set("qt", ReplicationHandler.PATH);
+        params.set("_trace", "assertVersions");
+        params.set("command", "indexversion");
+        QueryRequest req = new QueryRequest(params);
+        NamedList<Object> resp = client1.request(req);
+        assertReplicationResponseSucceeded(resp);
+        Long version = (Long) resp.get("indexversion");
+        assertEquals(maxVersionClient1, version);
+
+        // check vs /replication?command=indexversion call
+        resp = client2.request(req);
+        assertReplicationResponseSucceeded(resp);
+        version = (Long) resp.get("indexversion");
+        assertEquals(maxVersionClient2, version);
+    }
+
+    @SuppressWarnings({"unchecked"})
+    public static Long getVersion(SolrClient client) throws Exception {
+        NamedList<Object> details;
+        ArrayList<NamedList<Object>> commits;
+        details = getDetails(client);
+        commits = (ArrayList<NamedList<Object>>) details.get("commits");
+        Long maxVersionFollower= 0L;
+        for(NamedList<Object> commit : commits) {
+            Long version = (Long) commit.get("indexVersion");
+            maxVersionFollower = Math.max(version, maxVersionFollower);
+        }
+        return maxVersionFollower;
+    }
+
+    //Simple function to wrap the invocation of replication commands on the 
various
+    //jetty servers.
+    public static void invokeReplicationCommand(String baseUrl, String 
pCommand) throws IOException
+    {
+        //String leaderUrl = buildUrl(pJettyPort) + "/" + 
DEFAULT_TEST_CORENAME + ReplicationHandler.PATH+"?command=" + pCommand;
+        String url = baseUrl + ReplicationHandler.PATH+"?command=" + pCommand;
+        URL u = new URL(url);
+        InputStream stream = u.openStream();
+        stream.close();
+    }
+
+    public  static NamedList<Object> query(String query, SolrClient s) throws 
SolrServerException, IOException {
+        ModifiableSolrParams params = new ModifiableSolrParams();
+
+        params.add("q", query);
+        params.add("sort","id desc");
+
+        QueryResponse qres = s.query(params);
+        return qres.getResponse();
+    }
+
+    /** will sleep up to 30 seconds, looking for expectedDocCount */
+    public static NamedList<Object> rQuery(int expectedDocCount, String query, 
SolrClient client) throws Exception {
+        int timeSlept = 0;
+        NamedList<Object> res = query(query, client);
+        while (expectedDocCount != numFound(res)
+                && timeSlept < 30000) {
+            log.info("Waiting for {} docs", expectedDocCount);
+            timeSlept += 100;
+            Thread.sleep(100);
+            res = query(query, client);
+        }
+        if (log.isInfoEnabled()) {
+            log.info("Waited for {}ms and found {} docs", timeSlept, 
numFound(res));
+        }
+        return res;
+    }
+
+    public static long numFound(NamedList<Object> res) {
+        return ((SolrDocumentList) res.get("response")).getNumFound();
+    }
+
+    public static NamedList<Object> getDetails(SolrClient s) throws Exception {
+        ModifiableSolrParams params = new ModifiableSolrParams();
+        params.set("command","details");
+        params.set("_trace","getDetails");
+        params.set("qt",ReplicationHandler.PATH);
+        QueryRequest req = new QueryRequest(params);
+
+        NamedList<Object> res = s.request(req);
+        assertReplicationResponseSucceeded(res);
+
+        @SuppressWarnings("unchecked") NamedList<Object> details
+                = (NamedList<Object>) res.get("details");
+
+        assertNotNull("null details", details);
+
+        return details;
+    }
+
+    public static NamedList<Object> getIndexVersion(SolrClient s) throws 
Exception {
+
+        ModifiableSolrParams params = new ModifiableSolrParams();
+        params.set("command","indexversion");
+        params.set("_trace","getIndexVersion");
+        params.set("qt",ReplicationHandler.PATH);
+        QueryRequest req = new QueryRequest(params);
+
+        NamedList<Object> res = s.request(req);
+        assertReplicationResponseSucceeded(res);
+
+        return res;
+    }
+
+    public static void assertReplicationResponseSucceeded(NamedList<?> 
response) {
+        assertNotNull("null response from server", response);
+        assertNotNull("Expected replication response to have 'status' field", 
response.get("status"));
+        assertEquals("OK", response.get("status"));
+    }
+
+    public static NamedList<Object> reloadCore(SolrClient s, String core) 
throws Exception {
+
+        ModifiableSolrParams params = new ModifiableSolrParams();
+        params.set("action","reload");
+        params.set("core", core);
+        params.set("qt","/admin/cores");
+        QueryRequest req = new QueryRequest(params);
+
+        try (HttpSolrClient adminClient = adminClient(s)) {
+            NamedList<Object> res = adminClient.request(req);
+            assertNotNull("null response from server", res);
+            return res;
+        }
+    }
+
+    public static HttpSolrClient adminClient(SolrClient client) {
+        String adminUrl = 
((HttpSolrClient)client).getBaseURL().replace("/collection1", "");
+        return SolrTestCaseJ4.getHttpSolrClient(adminUrl);
+    }
+
+
+    public static void pullFromTo(String srcUrl, String destUrl) throws 
IOException {
+        URL url;
+        InputStream stream;
+        String leaderUrl = destUrl
+                + 
ReplicationHandler.PATH+"?wait=true&command=fetchindex&leaderUrl="
+                + srcUrl
+                + ReplicationHandler.PATH;
+        url = new URL(leaderUrl);
+        stream = url.openStream();
+        stream.close();
+    }
+
+    public static class SolrInstance {
+
+        final private String name;
+        private Integer testPort;
+        final private File homeDir;
+        private File confDir;
+        private File dataDir;

Review comment:
       I would prefer seeing these as Path instead of File.

##########
File path: 
solr/core/src/test/org/apache/solr/handler/TestHealthCheckHandlerLegacyMode.java
##########
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler;
+
+import org.apache.lucene.util.LuceneTestCase.Slow;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.HealthCheckRequest;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.util.TestInjection;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+
+import static org.apache.solr.handler.ReplicationTestHelper.*;
+
+
+/**
+ * Test for HealthCheckHandler in legacy mode
+ *
+ *
+ */
+@Slow
+@SuppressSSL     // Currently unknown why SSL does not work with this test
+public class TestHealthCheckHandlerLegacyMode extends SolrTestCaseJ4 {
+    HttpSolrClient leaderClientHealthCheck, followerClientHealthCheck;
+
+    JettySolrRunner leaderJetty, followerJetty;
+    HttpSolrClient leaderClient, followerClient;
+    ReplicationTestHelper.SolrInstance leader = null, follower = null;
+
+    private static String context = "/solr";
+
+    static int nDocs = 500;
+
+    @Before
+    public void setUp() throws Exception {
+        super.setUp();
+
+        systemSetPropertySolrDisableUrlAllowList("true");
+
+        leader = new 
ReplicationTestHelper.SolrInstance(createTempDir("solr-instance").toFile(), 
"leader", null);
+        leader.setUp();
+        leaderJetty = ReplicationTestHelper.createAndStartJetty(leader);
+        leaderClient = 
ReplicationTestHelper.createNewSolrClient(buildUrl(leaderJetty.getLocalPort(), 
context) + "/" + DEFAULT_TEST_CORENAME);
+        leaderClientHealthCheck = 
ReplicationTestHelper.createNewSolrClient(buildUrl(leaderJetty.getLocalPort(), 
context));
+
+        follower = new SolrInstance(createTempDir("solr-instance").toFile(), 
"follower", leaderJetty.getLocalPort());
+        follower.setUp();
+        followerJetty = createAndStartJetty(follower);
+        followerClient = 
ReplicationTestHelper.createNewSolrClient(buildUrl(followerJetty.getLocalPort(),
 context) + "/" + DEFAULT_TEST_CORENAME);
+        followerClientHealthCheck = 
ReplicationTestHelper.createNewSolrClient(buildUrl(followerJetty.getLocalPort(),
 context));
+
+        System.setProperty("solr.indexfetcher.sotimeout2", "45000");
+    }
+
+    public void clearIndexWithReplication() throws Exception {
+        if (numFound(ReplicationTestHelper.query("*:*", leaderClient)) != 0) {
+            leaderClient.deleteByQuery("*:*");
+            leaderClient.commit();
+            // wait for replication to sync & verify
+            assertEquals(0, numFound(rQuery(0, "*:*", followerClient)));
+        }
+    }
+
+    @Override
+    @After
+    public void tearDown() throws Exception {
+        super.tearDown();
+        if (null != leaderJetty) {
+            leaderJetty.stop();
+            leaderJetty = null;
+        }
+        if (null != followerJetty) {
+            followerJetty.stop();
+            followerJetty = null;
+        }
+        if (null != leaderClient) {
+            leaderClient.close();
+            leaderClient = null;
+        }
+        if (null != followerClient) {
+            followerClient.close();
+            followerClient = null;
+        }
+        if (null != leaderClientHealthCheck) {
+            leaderClientHealthCheck.close();
+            leaderClientHealthCheck = null;
+        }
+
+        if (null != followerClientHealthCheck) {
+            followerClientHealthCheck.close();
+            followerClientHealthCheck = null;
+        }
+        System.clearProperty("solr.indexfetcher.sotimeout");
+    }
+
+
+    @Test
+    // keep this
+    public void doTestHealthCheckWithReplication() throws Exception {
+
+        TestInjection.delayBeforeFollowerCommitRefresh = random().nextInt(10);
+
+        // stop replication so that the follower doesn't pull the index
+        invokeReplicationCommand(buildUrl(followerJetty.getLocalPort(), 
context) + "/" + DEFAULT_TEST_CORENAME, "disablepoll");
+
+        nDocs--;

Review comment:
       This seems strange to be decrementing a static field?

##########
File path: 
solr/core/src/test/org/apache/solr/handler/TestHealthCheckHandlerLegacyMode.java
##########
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler;
+
+import org.apache.lucene.util.LuceneTestCase.Slow;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.HealthCheckRequest;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.util.TestInjection;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+
+import static org.apache.solr.handler.ReplicationTestHelper.*;
+
+
+/**
+ * Test for HealthCheckHandler in legacy mode
+ *
+ *
+ */
+@Slow
+@SuppressSSL     // Currently unknown why SSL does not work with this test
+public class TestHealthCheckHandlerLegacyMode extends SolrTestCaseJ4 {
+    HttpSolrClient leaderClientHealthCheck, followerClientHealthCheck;
+
+    JettySolrRunner leaderJetty, followerJetty;
+    HttpSolrClient leaderClient, followerClient;
+    ReplicationTestHelper.SolrInstance leader = null, follower = null;
+
+    private static String context = "/solr";
+
+    static int nDocs = 500;
+
+    @Before
+    public void setUp() throws Exception {
+        super.setUp();
+
+        systemSetPropertySolrDisableUrlAllowList("true");

Review comment:
       Can we configure solr safely instead of relying on this to disable 
checks?

##########
File path: 
solr/solrj/src/java/org/apache/solr/client/solrj/request/HealthCheckRequest.java
##########
@@ -21,11 +21,17 @@
 import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.response.HealthCheckResponse;
+import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 
+import java.util.OptionalInt;
+
 import static 
org.apache.solr.common.params.CommonParams.HEALTH_CHECK_HANDLER_PATH;
 
 public class HealthCheckRequest extends SolrRequest<HealthCheckResponse> {
+    public static final String PARAM_MAX_GENERATION_LAG = "maxGenerationLag";
+
+    private OptionalInt maxLagAllowed = OptionalInt.empty();

Review comment:
       Why are we using this over an Integer that might be null? Or something 
like -1 meaning infinite lag?

##########
File path: solr/core/src/test/org/apache/solr/handler/ReplicationTestHelper.java
##########
@@ -0,0 +1,317 @@
+package org.apache.solr.handler;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.embedded.JettyConfig;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.util.FileUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.lang.invoke.MethodHandles;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Properties;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+public final class ReplicationTestHelper {
+
+    private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+    public static final String CONF_DIR = "solr"
+            + File.separator + "collection1" + File.separator + "conf"
+            + File.separator;
+
+
+    public static JettySolrRunner createAndStartJetty(SolrInstance instance) 
throws Exception {
+        FileUtils.copyFile(new File(SolrTestCaseJ4.TEST_HOME(), "solr.xml"), 
new File(instance.getHomeDir(), "solr.xml"));
+        Properties nodeProperties = new Properties();
+        nodeProperties.setProperty("solr.data.dir", instance.getDataDir());
+        JettyConfig jettyConfig = 
JettyConfig.builder().setContext("/solr").setPort(0).build();
+        JettySolrRunner jetty = new JettySolrRunner(instance.getHomeDir(), 
nodeProperties, jettyConfig);
+        jetty.start();
+        return jetty;
+    }
+
+    public static HttpSolrClient createNewSolrClient(String baseUrl) {
+        try {
+            // setup the client...
+            HttpSolrClient client = SolrTestCaseJ4.getHttpSolrClient(baseUrl, 
15000, 90000);
+            return client;
+        }
+        catch (Exception ex) {
+            throw new RuntimeException(ex);
+        }
+    }
+
+    public static int index(SolrClient s, Object... fields) throws Exception {
+        SolrInputDocument doc = new SolrInputDocument();
+        for (int i = 0; i < fields.length; i += 2) {
+            doc.addField((String) (fields[i]), fields[i + 1]);
+        }
+        return s.add(doc).getStatus();
+    }
+
+    /**
+     * character copy of file using UTF-8. If port is non-null, will be 
substituted any time "TEST_PORT" is found.
+     */
+    private static void copyFile(File src, File dst, Integer port, boolean 
internalCompression) throws IOException {
+        try (BufferedReader in = new BufferedReader(new InputStreamReader(new 
FileInputStream(src), StandardCharsets.UTF_8));
+             Writer out = new OutputStreamWriter(new FileOutputStream(dst), 
StandardCharsets.UTF_8)) {
+
+            for (String line = in.readLine(); null != line; line = 
in.readLine()) {
+                if (null != port) {
+                    line = line.replace("TEST_PORT", port.toString());
+                }
+                line = line.replace("COMPRESSION", internalCompression ? 
"internal" : "false");
+                out.write(line);
+            }
+        }
+    }
+
+    public static void assertVersions(SolrClient client1, SolrClient client2) 
throws Exception {
+        NamedList<Object> details = getDetails(client1);
+        @SuppressWarnings({"unchecked"})
+        ArrayList<NamedList<Object>> commits = (ArrayList<NamedList<Object>>) 
details.get("commits");
+        Long maxVersionClient1 = getVersion(client1);
+        Long maxVersionClient2 = getVersion(client2);
+
+        if (maxVersionClient1 > 0 && maxVersionClient2 > 0) {
+            assertEquals(maxVersionClient1, maxVersionClient2);
+        }
+
+        // check vs /replication?command=indexversion call
+        ModifiableSolrParams params = new ModifiableSolrParams();
+        params.set("qt", ReplicationHandler.PATH);
+        params.set("_trace", "assertVersions");
+        params.set("command", "indexversion");
+        QueryRequest req = new QueryRequest(params);
+        NamedList<Object> resp = client1.request(req);
+        assertReplicationResponseSucceeded(resp);
+        Long version = (Long) resp.get("indexversion");
+        assertEquals(maxVersionClient1, version);
+
+        // check vs /replication?command=indexversion call
+        resp = client2.request(req);
+        assertReplicationResponseSucceeded(resp);
+        version = (Long) resp.get("indexversion");
+        assertEquals(maxVersionClient2, version);
+    }
+
+    @SuppressWarnings({"unchecked"})
+    public static Long getVersion(SolrClient client) throws Exception {
+        NamedList<Object> details;
+        ArrayList<NamedList<Object>> commits;
+        details = getDetails(client);
+        commits = (ArrayList<NamedList<Object>>) details.get("commits");
+        Long maxVersionFollower= 0L;
+        for(NamedList<Object> commit : commits) {
+            Long version = (Long) commit.get("indexVersion");
+            maxVersionFollower = Math.max(version, maxVersionFollower);
+        }
+        return maxVersionFollower;
+    }
+
+    //Simple function to wrap the invocation of replication commands on the 
various
+    //jetty servers.
+    public static void invokeReplicationCommand(String baseUrl, String 
pCommand) throws IOException
+    {
+        //String leaderUrl = buildUrl(pJettyPort) + "/" + 
DEFAULT_TEST_CORENAME + ReplicationHandler.PATH+"?command=" + pCommand;
+        String url = baseUrl + ReplicationHandler.PATH+"?command=" + pCommand;
+        URL u = new URL(url);
+        InputStream stream = u.openStream();
+        stream.close();
+    }
+
+    public  static NamedList<Object> query(String query, SolrClient s) throws 
SolrServerException, IOException {
+        ModifiableSolrParams params = new ModifiableSolrParams();
+
+        params.add("q", query);
+        params.add("sort","id desc");
+
+        QueryResponse qres = s.query(params);
+        return qres.getResponse();
+    }
+
+    /** will sleep up to 30 seconds, looking for expectedDocCount */
+    public static NamedList<Object> rQuery(int expectedDocCount, String query, 
SolrClient client) throws Exception {
+        int timeSlept = 0;
+        NamedList<Object> res = query(query, client);
+        while (expectedDocCount != numFound(res)
+                && timeSlept < 30000) {
+            log.info("Waiting for {} docs", expectedDocCount);
+            timeSlept += 100;
+            Thread.sleep(100);
+            res = query(query, client);
+        }
+        if (log.isInfoEnabled()) {
+            log.info("Waited for {}ms and found {} docs", timeSlept, 
numFound(res));
+        }
+        return res;
+    }
+
+    public static long numFound(NamedList<Object> res) {
+        return ((SolrDocumentList) res.get("response")).getNumFound();
+    }
+
+    public static NamedList<Object> getDetails(SolrClient s) throws Exception {
+        ModifiableSolrParams params = new ModifiableSolrParams();
+        params.set("command","details");
+        params.set("_trace","getDetails");
+        params.set("qt",ReplicationHandler.PATH);
+        QueryRequest req = new QueryRequest(params);
+
+        NamedList<Object> res = s.request(req);
+        assertReplicationResponseSucceeded(res);
+
+        @SuppressWarnings("unchecked") NamedList<Object> details
+                = (NamedList<Object>) res.get("details");
+
+        assertNotNull("null details", details);
+
+        return details;
+    }
+
+    public static NamedList<Object> getIndexVersion(SolrClient s) throws 
Exception {
+
+        ModifiableSolrParams params = new ModifiableSolrParams();
+        params.set("command","indexversion");
+        params.set("_trace","getIndexVersion");
+        params.set("qt",ReplicationHandler.PATH);
+        QueryRequest req = new QueryRequest(params);
+
+        NamedList<Object> res = s.request(req);
+        assertReplicationResponseSucceeded(res);
+
+        return res;
+    }
+
+    public static void assertReplicationResponseSucceeded(NamedList<?> 
response) {
+        assertNotNull("null response from server", response);
+        assertNotNull("Expected replication response to have 'status' field", 
response.get("status"));
+        assertEquals("OK", response.get("status"));
+    }
+
+    public static NamedList<Object> reloadCore(SolrClient s, String core) 
throws Exception {
+
+        ModifiableSolrParams params = new ModifiableSolrParams();
+        params.set("action","reload");
+        params.set("core", core);
+        params.set("qt","/admin/cores");
+        QueryRequest req = new QueryRequest(params);
+
+        try (HttpSolrClient adminClient = adminClient(s)) {
+            NamedList<Object> res = adminClient.request(req);
+            assertNotNull("null response from server", res);
+            return res;
+        }
+    }
+
+    public static HttpSolrClient adminClient(SolrClient client) {
+        String adminUrl = 
((HttpSolrClient)client).getBaseURL().replace("/collection1", "");
+        return SolrTestCaseJ4.getHttpSolrClient(adminUrl);
+    }
+
+
+    public static void pullFromTo(String srcUrl, String destUrl) throws 
IOException {
+        URL url;
+        InputStream stream;
+        String leaderUrl = destUrl
+                + 
ReplicationHandler.PATH+"?wait=true&command=fetchindex&leaderUrl="
+                + srcUrl
+                + ReplicationHandler.PATH;
+        url = new URL(leaderUrl);
+        stream = url.openStream();
+        stream.close();
+    }
+
+    public static class SolrInstance {

Review comment:
       I'm not clear what value this provides, can you elaborate a little bit?

##########
File path: 
solr/core/src/java/org/apache/solr/handler/admin/HealthCheckHandler.java
##########
@@ -135,6 +153,80 @@ public void handleRequestBody(SolrQueryRequest req, 
SolrQueryResponse rsp) throw
     rsp.add(STATUS, OK);
   }
 
+  private void healthCheckLegacyMode(SolrQueryRequest req, SolrQueryResponse 
rsp) {
+    Integer maxGenerationLag = 
req.getParams().getInt(HealthCheckRequest.PARAM_MAX_GENERATION_LAG);
+    List<String> laggingCoresInfo = new ArrayList<>();
+    boolean allCoresAreInSync = true;
+
+    // check only if max generation lag is specified
+    if(maxGenerationLag != null) {
+      for(SolrCore core : coreContainer.getCores()) {
+        ReplicationHandler replicationHandler =
+          (ReplicationHandler) core.getRequestHandler(ReplicationHandler.PATH);
+        // if maxGeneration lag is not specified don't check if follower is in 
sync.
+        if(replicationHandler.isFollower()) {
+          boolean isCoreInSync =
+            generationLagFromLeader(core, replicationHandler, 
maxGenerationLag, laggingCoresInfo);
+
+          allCoresAreInSync &= isCoreInSync;
+        }
+      }
+    }
+
+    if(allCoresAreInSync) {
+      rsp.add("message",
+        String.format(Locale.ROOT, "All the followers are in sync with leader 
(within maxGenerationLag: %d) " +
+          "or all the cores are acting as leader", maxGenerationLag));
+      rsp.add(STATUS, OK);
+    } else {
+      rsp.add("message",
+        String.format(Locale.ROOT,"Cores violating maxGenerationLag:%d.\n%s", 
maxGenerationLag,
+          String.join(",\n", laggingCoresInfo)));
+      rsp.add(STATUS, FAILURE);
+    }
+  }
+
+  private boolean generationLagFromLeader(final SolrCore core, 
ReplicationHandler replicationHandler,
+                                          int maxGenerationLag, List<String> 
laggingCoresInfo) {
+    IndexFetcher indexFetcher = null;
+    try {
+      // may not be the best way to get leader's replicableCommit
+      @SuppressWarnings({"rawtypes"})

Review comment:
       please use proper generic types in new code!

##########
File path: 
solr/core/src/java/org/apache/solr/handler/admin/HealthCheckHandler.java
##########
@@ -135,6 +153,80 @@ public void handleRequestBody(SolrQueryRequest req, 
SolrQueryResponse rsp) throw
     rsp.add(STATUS, OK);
   }
 
+  private void healthCheckLegacyMode(SolrQueryRequest req, SolrQueryResponse 
rsp) {
+    Integer maxGenerationLag = 
req.getParams().getInt(HealthCheckRequest.PARAM_MAX_GENERATION_LAG);
+    List<String> laggingCoresInfo = new ArrayList<>();
+    boolean allCoresAreInSync = true;
+
+    // check only if max generation lag is specified
+    if(maxGenerationLag != null) {
+      for(SolrCore core : coreContainer.getCores()) {
+        ReplicationHandler replicationHandler =
+          (ReplicationHandler) core.getRequestHandler(ReplicationHandler.PATH);
+        // if maxGeneration lag is not specified don't check if follower is in 
sync.
+        if(replicationHandler.isFollower()) {
+          boolean isCoreInSync =
+            generationLagFromLeader(core, replicationHandler, 
maxGenerationLag, laggingCoresInfo);
+
+          allCoresAreInSync &= isCoreInSync;
+        }
+      }
+    }
+
+    if(allCoresAreInSync) {
+      rsp.add("message",
+        String.format(Locale.ROOT, "All the followers are in sync with leader 
(within maxGenerationLag: %d) " +
+          "or all the cores are acting as leader", maxGenerationLag));
+      rsp.add(STATUS, OK);
+    } else {
+      rsp.add("message",
+        String.format(Locale.ROOT,"Cores violating maxGenerationLag:%d.\n%s", 
maxGenerationLag,
+          String.join(",\n", laggingCoresInfo)));
+      rsp.add(STATUS, FAILURE);
+    }
+  }
+
+  private boolean generationLagFromLeader(final SolrCore core, 
ReplicationHandler replicationHandler,
+                                          int maxGenerationLag, List<String> 
laggingCoresInfo) {
+    IndexFetcher indexFetcher = null;
+    try {
+      // may not be the best way to get leader's replicableCommit
+      @SuppressWarnings({"rawtypes"})
+      NamedList follower =
+        
ReplicationHandler.getObjectWithBackwardCompatibility(replicationHandler.getInitArgs(),
 "follower",
+                  "slave");
+      indexFetcher = new IndexFetcher(follower, replicationHandler, core);
+
+      @SuppressWarnings({"rawtypes"})
+      NamedList replicableCommitOnLeader = indexFetcher.getLatestVersion();
+      long leaderGeneration = (Long) replicableCommitOnLeader.get(GENERATION);
+
+      // Get our own commit and generation from the commit
+      IndexCommit commit = core.getDeletionPolicy().getLatestCommit();
+      if(commit != null) {
+        long followerGeneration = commit.getGeneration();
+        long generationDiff = leaderGeneration - followerGeneration;
+
+        // generationDiff should be within the threshold and generationDiff 
shouldn't be negative

Review comment:
       what happens if it is negative? could it. ever negative? should we throw 
an exception?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org

Reply via email to