This is an automated email from the ASF dual-hosted git repository. jdyer pushed a commit to branch branch_9x in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9x by this push: new f22a40eb59b SOLR-17569: fix flaky test TestLBHttpSolrClient (9.x) / LBHttp2SolrClientIntegrationTest (main) (#2884) f22a40eb59b is described below commit f22a40eb59b9da86af2c02aded83f18949da6392 Author: James Dyer <jd...@apache.org> AuthorDate: Tue Nov 26 09:45:35 2024 -0600 SOLR-17569: fix flaky test TestLBHttpSolrClient (9.x) / LBHttp2SolrClientIntegrationTest (main) (#2884) --- .../solr/client/solrj/impl/LBSolrClient.java | 12 ++++ .../solr/client/solrj/TestLBHttp2SolrClient.java | 71 ++++++---------------- .../src/java/org/apache/solr/util/LogListener.java | 53 +++++++++++----- 3 files changed, 69 insertions(+), 67 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java index 1128c086460..172dfd5ab04 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java @@ -20,6 +20,7 @@ package org.apache.solr.client.solrj.impl; import static org.apache.solr.common.params.CommonParams.ADMIN_PATHS; import java.io.IOException; +import java.lang.invoke.MethodHandles; import java.lang.ref.WeakReference; import java.net.ConnectException; import java.net.MalformedURLException; @@ -60,10 +61,18 @@ import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.ObjectReleaseTracker; import org.apache.solr.common.util.SolrNamedThreadFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.slf4j.MDC; public abstract class LBSolrClient extends SolrClient { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + public static final String UPDATE_LIVE_SERVER_MESSAGE = "Updated alive server list"; + + private static final String UPDATE_LIVE_SERVER_LOG = UPDATE_LIVE_SERVER_MESSAGE + ": {}"; + // defaults protected static final Set<Integer> RETRY_CODES = new HashSet<>(Arrays.asList(404, 403, 503, 500)); @@ -422,6 +431,9 @@ public abstract class LBSolrClient extends SolrClient { protected void updateAliveList() { synchronized (aliveServers) { aliveServerList = aliveServers.values().toArray(new ServerWrapper[0]); + if (log.isDebugEnabled()) { + log.debug(UPDATE_LIVE_SERVER_LOG, Arrays.toString(aliveServerList)); + } } } diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/TestLBHttp2SolrClient.java b/solr/solrj/src/test/org/apache/solr/client/solrj/TestLBHttp2SolrClient.java index 78f19b4074d..3ab44ca13d4 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/TestLBHttp2SolrClient.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/TestLBHttp2SolrClient.java @@ -31,13 +31,13 @@ import org.apache.lucene.util.IOUtils; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.client.solrj.impl.Http2SolrClient; import org.apache.solr.client.solrj.impl.LBHttp2SolrClient; +import org.apache.solr.client.solrj.impl.LBSolrClient; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.SolrResponseBase; import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.common.util.TimeSource; import org.apache.solr.embedded.JettyConfig; import org.apache.solr.embedded.JettySolrRunner; -import org.apache.solr.util.TimeOut; +import org.apache.solr.util.LogListener; import org.junit.AfterClass; import org.junit.BeforeClass; import org.slf4j.Logger; @@ -153,10 +153,8 @@ public class TestLBHttp2SolrClient extends SolrTestCaseJ4 { assertEquals(2, names.size()); assertFalse(names.contains("solr1")); - // Start the killed server once again - solr[1].startJetty(); - // Wait for the alive check to complete - Thread.sleep(1200); + startJettyAndWaitForAliveCheckQuery(solr[1]); + names.clear(); for (String ignored : solrUrls) { resp = client.query(solrQuery); @@ -190,63 +188,32 @@ public class TestLBHttp2SolrClient extends SolrTestCaseJ4 { resp = client.query(solrQuery); name = resp.getResults().get(0).getFieldValue("name").toString(); assertEquals("solr/collection11", name); + solr[1].jetty.stop(); solr[1].jetty = null; - solr[0].startJetty(); - Thread.sleep(1200); - try { - resp = client.query(solrQuery); - } catch (SolrServerException e) { - // try again after a pause in case the error is lack of time to start server - Thread.sleep(3000); - resp = client.query(solrQuery); - } + startJettyAndWaitForAliveCheckQuery(solr[0]); + + resp = client.query(solrQuery); name = resp.getResults().get(0).getFieldValue("name").toString(); assertEquals("solr/collection10", name); } } - public void testReliability() throws Exception { - String[] solrUrls = new String[solr.length]; - for (int i = 0; i < solr.length; i++) { - solrUrls[i] = solr[i].getUrl(); - } - - try (LBHttp2SolrClient client = - new LBHttp2SolrClient.Builder(httpClient, solrUrls) - .setAliveCheckInterval(500, TimeUnit.MILLISECONDS) - .build()) { - - // Kill a server and test again - solr[1].jetty.stop(); - solr[1].jetty = null; - - // query the servers - for (String ignored : solrUrls) client.query(new SolrQuery("*:*")); - - // Start the killed server once again - solr[1].startJetty(); - // Wait for the alive check to complete - waitForServer(30, client, 3, solr[1].name); + private LBSolrClient.Endpoint[] bootstrapBaseSolrEndpoints(int max) { + LBSolrClient.Endpoint[] solrUrls = new LBSolrClient.Endpoint[max]; + for (int i = 0; i < max; i++) { + solrUrls[i] = new LBSolrClient.Endpoint(solr[i].getBaseUrl()); } + return solrUrls; } - // wait maximum ms for serverName to come back up - private void waitForServer( - int maxSeconds, LBHttp2SolrClient client, int nServers, String serverName) throws Exception { - final TimeOut timeout = new TimeOut(maxSeconds, TimeUnit.SECONDS, TimeSource.NANO_TIME); - while (!timeout.hasTimedOut()) { - QueryResponse resp; - try { - resp = client.query(new SolrQuery("*:*")); - } catch (Exception e) { - log.warn("", e); - continue; + private void startJettyAndWaitForAliveCheckQuery(SolrInstance solrInstance) throws Exception { + try (LogListener logListener = + LogListener.debug().substring(LBSolrClient.UPDATE_LIVE_SERVER_MESSAGE)) { + solrInstance.startJetty(); + if (logListener.pollMessage(10, TimeUnit.SECONDS) == null) { + fail("The alive check query was not logged within 10 seconds."); } - String name = resp.getResults().get(0).getFieldValue("name").toString(); - if (name.equals(serverName)) return; - - Thread.sleep(500); } } diff --git a/solr/test-framework/src/java/org/apache/solr/util/LogListener.java b/solr/test-framework/src/java/org/apache/solr/util/LogListener.java index fcbb453b2b8..820ee30371b 100644 --- a/solr/test-framework/src/java/org/apache/solr/util/LogListener.java +++ b/solr/test-framework/src/java/org/apache/solr/util/LogListener.java @@ -18,11 +18,13 @@ package org.apache.solr.util; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; import java.io.Closeable; import java.lang.invoke.MethodHandles; -import java.util.Queue; import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; @@ -44,9 +46,9 @@ import org.apache.logging.log4j.message.Message; import org.apache.solr.common.util.SuppressForbidden; /** - * Helper code to listen for {@link LogEvent} messages (via a {@link Queue}) that you expect as a - * result of the things you are testing, So you can make assertions about when a particular action - * should/shouldn't cause Solr to produce a particular Log message + * Helper code to listen for {@link LogEvent} messages (via a {@link BlockingQueue}) that you expect + * as a result of the things you are testing, So you can make assertions about when a particular + * action should/shouldn't cause Solr to produce a particular Log message * * <p><code> * // simplest possible usage... @@ -278,7 +280,7 @@ public final class LogListener implements Closeable, AutoCloseable { * * @see #getQueue */ - public LogListener setQueue(Queue<LogEvent> queue) { + public LogListener setQueue(BlockingQueue<LogEvent> queue) { loggerAppender.setQueue(queue); return this; } @@ -346,21 +348,22 @@ public final class LogListener implements Closeable, AutoCloseable { } /** - * Direct access to the Queue of Log events that have been recorded, for {@link Queue#poll}ing - * messages or any other inspection/manipulation. + * Direct access to the Queue of Log events that have been recorded, for {@link + * BlockingQueue#poll}ing messages or any other inspection/manipulation. * * <p>If a Log event is ever processed but can not be added to this queue (because {@link - * Queue#offer} returns false) then the {@link #close} method of this listener will fail the test. + * BlockingQueue#offer} returns false) then the {@link #close} method of this listener will fail + * the test. * * @see #setQueue * @see #pollMessage */ - public Queue<LogEvent> getQueue() { + public BlockingQueue<LogEvent> getQueue() { return loggerAppender.getQueue(); } /** - * Convinience method for tests that want to assert things about the (formated) message string at + * Convenience method for tests that want to assert things about the (formated) message string at * the head of the queue, w/o needing to know/call methods on the underlying {@link LogEvent} * class. * @@ -373,6 +376,26 @@ public final class LogListener implements Closeable, AutoCloseable { return null == event ? null : event.getMessage().getFormattedMessage(); } + /** + * Convenience method for tests that want to assert things about the (formated) message string at + * the head of the queue, waiting up to the specified timeout for the message to arrive. + * + * @param timeout the duation value + * @param unit the duration unit + * @return the formatted message string of head of the queue, or null if the queue remained empty + * until the specified timeout. + */ + public String pollMessage(long timeout, TimeUnit unit) { + LogEvent event = null; + try { + event = getQueue().poll(timeout, unit); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + fail("Our thread was interrupted while polling the queue."); + } + return null == event ? null : event.getMessage().getFormattedMessage(); + } + /** * The total number of Log events so far processed by this instance, regardless of wether they * have already been removed from the queue, or if they could not be added to the queue due to @@ -598,7 +621,7 @@ public final class LogListener implements Closeable, AutoCloseable { private static final class QueueAppender extends AbstractAppender { // may be mutated in main thread while background thread is actively logging - private final AtomicReference<Queue<LogEvent>> queue = + private final AtomicReference<BlockingQueue<LogEvent>> queue = new AtomicReference<>(new ArrayBlockingQueue<>(100)); final AtomicInteger count = new AtomicInteger(0); final AtomicInteger capacityExceeded = new AtomicInteger(0); @@ -611,7 +634,7 @@ public final class LogListener implements Closeable, AutoCloseable { @Override public void append(final LogEvent event) { - final Queue<LogEvent> q = queue.get(); // read from reference once + final BlockingQueue<LogEvent> q = queue.get(); // read from reference once final LogEvent memento = (event instanceof MutableLogEvent) ? ((MutableLogEvent) event).createMemento() : event; final int currentCount = count.incrementAndGet(); @@ -648,20 +671,20 @@ public final class LogListener implements Closeable, AutoCloseable { * Returns the number of times this appender was unable to queue a LogEvent due to exceeding * capacity * - * @see Queue#offer + * @see BlockingQueue#offer */ public int getNumCapacityExceeded() { return capacityExceeded.get(); } /** Changes the queue that will be used for any future events that are appended */ - public void setQueue(final Queue<LogEvent> q) { + public void setQueue(final BlockingQueue<LogEvent> q) { assert null != q; this.queue.set(q); } /** Returns Raw access to the (current) queue */ - public Queue<LogEvent> getQueue() { + public BlockingQueue<LogEvent> getQueue() { return queue.get(); } }