This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git

commit fca29a9f85bd06f0a81d637a92bb5f4700a26894
Merge: 38d28c233c e3e4c26e65
Author: Keith Turner <ktur...@apache.org>
AuthorDate: Fri Aug 11 17:47:23 2023 -0400

    Merge branch '2.1'

 .../accumulo/core/classloader/ClassLoaderUtil.java |  19 ++
 .../core/conf/ConfigurationTypeHelper.java         |   2 +-
 .../core/spi/common/ContextClassLoaderFactory.java |   4 +-
 .../accumulo/server/conf/TableConfiguration.java   |   8 +
 .../org/apache/accumulo/server/util/PropUtil.java  |  15 +-
 .../accumulo/server/util/SystemPropUtil.java       |  19 +-
 .../accumulo/tserver/TabletClientHandler.java      |   3 +
 .../accumulo/tserver/tablet/CompactableImpl.java   |   7 +
 .../tserver/tablet/MinorCompactionTask.java        |   9 +-
 .../org/apache/accumulo/tserver/tablet/Tablet.java |  78 ++++--
 .../accumulo/shell/commands/ConfigCommand.java     |  31 ++-
 .../test/functional/HalfClosedTablet2IT.java       | 121 +++++++++
 .../test/functional/HalfClosedTabletIT.java        | 282 +++++++++++++++++++++
 .../org/apache/accumulo/test/shell/ShellIT.java    |  16 +-
 .../apache/accumulo/test/shell/ShellServerIT.java  |  79 ++++++
 15 files changed, 651 insertions(+), 42 deletions(-)

diff --cc 
core/src/main/java/org/apache/accumulo/core/conf/ConfigurationTypeHelper.java
index 9daadb58b4,8893ae8ce9..ef5e0b3b5e
--- 
a/core/src/main/java/org/apache/accumulo/core/conf/ConfigurationTypeHelper.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/conf/ConfigurationTypeHelper.java
@@@ -172,8 -173,8 +172,8 @@@ public class ConfigurationTypeHelper 
  
      try {
        instance = getClassInstance(context, clazzName, base);
 -    } catch (RuntimeException | IOException | ReflectiveOperationException e) 
{
 +    } catch (RuntimeException | ReflectiveOperationException e) {
-       log.warn("Failed to load class {} in classloader context {}", 
clazzName, context, e);
+       log.error("Failed to load class {} in classloader context {}", 
clazzName, context, e);
      }
  
      if (instance == null && defaultInstance != null) {
diff --cc 
core/src/main/java/org/apache/accumulo/core/spi/common/ContextClassLoaderFactory.java
index a742ad0a33,3d9c18683f..c0e1863802
--- 
a/core/src/main/java/org/apache/accumulo/core/spi/common/ContextClassLoaderFactory.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/spi/common/ContextClassLoaderFactory.java
@@@ -55,15 -55,16 +55,17 @@@ public interface ContextClassLoaderFact
    default void init(ContextClassLoaderEnvironment env) {}
  
    /**
 -   * Get the class loader for the given contextName. Callers should not cache 
the ClassLoader result
 -   * as it may change if/when the ClassLoader reloads. Implementations should 
throw a
 -   * RuntimeException of some type (such as IllegalArgumentException) if the 
provided contextName is
 -   * not supported or fails to be constructed.
 +   * Get the class loader for the given context. Callers should not cache the 
ClassLoader result as
 +   * it may change if/when the ClassLoader reloads. Implementations should 
throw a RuntimeException
 +   * of some type (such as IllegalArgumentException) if the provided context 
is not supported or
 +   * fails to be constructed.
     *
 -   * @param contextName the name of the context that represents a class 
loader that is managed by
 -   *        this factory. Currently, Accumulo will only call this method for 
non-null and non-empty
 +   * @param context the name of the context that represents a class loader 
that is managed by this
-    *        factory (can be null)
++   *        factory. Currently, Accumulo will only call this method for 
non-null and non-empty
+    *        context. For empty or null context, Accumulo will use the system 
classloader without
+    *        consulting this plugin.
 -   * @return the class loader for the given contextName
 +   * @return the class loader for the given context
     */
 -  ClassLoader getClassLoader(String contextName);
 +  ClassLoader getClassLoader(String context);
 +
  }
diff --cc 
server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/MinorCompactionTask.java
index 349e36d43a,f190d79460..ec5db3b815
--- 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/MinorCompactionTask.java
+++ 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/MinorCompactionTask.java
@@@ -18,10 -18,7 +18,8 @@@
   */
  package org.apache.accumulo.tserver.tablet;
  
- import java.io.IOException;
- 
 -import org.apache.accumulo.core.metadata.TabletFile;
 +import org.apache.accumulo.core.file.FilePrefix;
 +import org.apache.accumulo.core.metadata.ReferencedTabletFile;
  import org.apache.accumulo.core.metadata.schema.DataFileValue;
  import org.apache.accumulo.core.trace.TraceUtil;
  import org.apache.accumulo.tserver.MinorCompactionReason;
diff --cc 
test/src/main/java/org/apache/accumulo/test/functional/HalfClosedTabletIT.java
index 0000000000,8f6d907e96..c06a23da72
mode 000000,100644..100644
--- 
a/test/src/main/java/org/apache/accumulo/test/functional/HalfClosedTabletIT.java
+++ 
b/test/src/main/java/org/apache/accumulo/test/functional/HalfClosedTabletIT.java
@@@ -1,0 -1,282 +1,282 @@@
+ /*
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements.  See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership.  The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License.  You may obtain a copy of the License at
+  *
+  *   https://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied.  See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+ package org.apache.accumulo.test.functional;
+ 
+ import static org.junit.jupiter.api.Assertions.assertTrue;
+ 
+ import java.util.EnumSet;
+ import java.util.List;
+ import java.util.Map;
+ import java.util.concurrent.TimeUnit;
+ 
+ import org.apache.accumulo.core.client.Accumulo;
+ import org.apache.accumulo.core.client.AccumuloClient;
+ import org.apache.accumulo.core.client.AccumuloException;
+ import org.apache.accumulo.core.client.AccumuloSecurityException;
+ import org.apache.accumulo.core.client.BatchWriter;
+ import org.apache.accumulo.core.client.IteratorSetting;
+ import org.apache.accumulo.core.client.admin.CompactionConfig;
+ import org.apache.accumulo.core.clientImpl.ClientContext;
+ import org.apache.accumulo.core.conf.Property;
+ import org.apache.accumulo.core.data.Mutation;
+ import org.apache.accumulo.core.data.TableId;
+ import org.apache.accumulo.core.data.Value;
+ import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
+ import org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType;
+ import org.apache.accumulo.core.metadata.schema.TabletsMetadata;
+ import org.apache.accumulo.core.util.UtilWaitThread;
+ import org.apache.accumulo.harness.MiniClusterConfigurationCallback;
+ import org.apache.accumulo.harness.SharedMiniClusterBase;
+ import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl;
+ import org.apache.accumulo.server.ServerContext;
+ import org.apache.accumulo.server.conf.store.TablePropKey;
+ import org.apache.accumulo.test.util.Wait;
+ import org.apache.hadoop.conf.Configuration;
+ import org.apache.hadoop.io.Text;
+ import org.junit.jupiter.api.AfterAll;
+ import org.junit.jupiter.api.BeforeAll;
+ import org.junit.jupiter.api.Test;
+ 
+ import com.google.common.collect.Sets;
+ 
+ // This IT tests the cases seen in 
https://github.com/apache/accumulo/issues/3674
+ // where a failing minor compaction causes a Tablet.initiateClose to leave the
+ // Tablet in a half-closed state. The half-closed Tablet cannot be unloaded 
and
+ // the TabletServer cannot be shutdown normally. Because the minor compaction 
has
+ // been failing the Tablet needs to be recovered when it's ultimately 
re-assigned.
+ //
+ public class HalfClosedTabletIT extends SharedMiniClusterBase {
+ 
+   public static class HalfClosedTabletITConfiguration implements 
MiniClusterConfigurationCallback {
+ 
+     @Override
+     public void configureMiniCluster(MiniAccumuloConfigImpl cfg, 
Configuration coreSite) {
+       cfg.setNumTservers(1);
+     }
+ 
+   }
+ 
+   @BeforeAll
+   public static void startup() throws Exception {
+     SharedMiniClusterBase.startMiniClusterWithConfig(new 
HalfClosedTabletITConfiguration());
+   }
+ 
+   @AfterAll
+   public static void shutdown() throws Exception {
+     SharedMiniClusterBase.stopMiniCluster();
+   }
+ 
+   @Test
+   public void testSplitWithInvalidContext() throws Exception {
+ 
+     // In this scenario the table has been mis-configured with an invalid 
context name.
+     // The minor compaction task is failing because classes like the volume 
chooser or
+     // user iterators cannot be loaded. The user calls Tablet.split which 
calls initiateClose.
+     // This test ensures that the Tablet can still be unloaded normally by 
taking if offline
+     // after the split call with an invalid context. The context property is 
removed after the
+     // split call below to get the minor compaction task to succeed on a 
subsequent run. Because
+     // the minor compaction task backs off when retrying, this could take 
some time.
+ 
+     String tableName = getUniqueNames(1)[0];
+ 
+     try (final var client = 
Accumulo.newClient().from(getClientProps()).build()) {
+       final var tops = client.tableOperations();
+       tops.create(tableName);
+       TableId tableId = TableId.of(tops.tableIdMap().get(tableName));
+       try (final var bw = client.createBatchWriter(tableName)) {
+         final var m1 = new Mutation("a");
+         final var m2 = new Mutation("b");
+         m1.put(new Text("cf"), new Text(), new Value());
+         m2.put(new Text("cf"), new Text(), new Value());
+         bw.addMutation(m1);
+         bw.addMutation(m2);
+       }
+ 
+       
setInvalidClassLoaderContextPropertyWithoutValidation(getCluster().getServerContext(),
+           tableId);
+ 
+       // Need to wait for TabletServer to pickup configuration change
+       Thread.sleep(3000);
+ 
+       Thread configFixer = new Thread(() -> {
+         UtilWaitThread.sleep(3000);
+         removeInvalidClassLoaderContextProperty(client, tableName);
+       });
+ 
+       long t1 = System.nanoTime();
+       configFixer.start();
+ 
+       // The split will probably start running w/ bad config that will cause 
it to get stuck.
+       // However once the config is fixed by the background thread it should 
continue.
+       tops.addSplits(tableName, Sets.newTreeSet(List.of(new Text("b"))));
+ 
+       long t2 = System.nanoTime();
+       // expect that split took at least 3 seconds because that is the time 
it takes to fix the
+       // config
+       assertTrue(TimeUnit.NANOSECONDS.toMillis(t2 - t1) >= 3000);
+ 
+       // offline the table which will unload the tablets. If the context 
property is not
+       // removed above, then this test will fail because the tablets will not 
be able to be
+       // unloaded
+       tops.offline(tableName);
+ 
+       Wait.waitFor(() -> countHostedTablets(client, tableId) == 0L, 340_000);
+     }
+   }
+ 
+   @Test
+   public void testIteratorThrowingTransientError() throws Exception {
+ 
+     // In this scenario a minc iterator throws an error some number of time, 
then
+     // succeeds. We want to verify that the minc is being retried and the 
tablet
+     // can be closed.
+ 
+     try (AccumuloClient c = 
Accumulo.newClient().from(getClientProps()).build()) {
+ 
+       String tableName = getUniqueNames(1)[0];
+       final var tops = c.tableOperations();
+ 
+       tops.create(tableName);
+       final var tid = TableId.of(tops.tableIdMap().get(tableName));
+ 
+       try (BatchWriter bw = c.createBatchWriter(tableName)) {
+         Mutation m = new Mutation(new Text("r1"));
+         m.put("acf", tableName, "1");
+         bw.addMutation(m);
+       }
+ 
+       IteratorSetting setting = new IteratorSetting(50, "error", 
ErrorThrowingIterator.class);
+       setting.addOption(ErrorThrowingIterator.TIMES, "3");
+       c.tableOperations().attachIterator(tableName, setting, 
EnumSet.of(IteratorScope.minc));
+       c.tableOperations().compact(tableName, new 
CompactionConfig().setWait(true).setFlush(true));
+ 
+       // Taking the table offline should succeed normally
+       tops.offline(tableName);
+ 
+       // Minc should have completed successfully
+       Wait.waitFor(() -> tabletHasExpectedRFiles(c, tableName, 1, 1, 1, 1), 
340_000);
+ 
+       Wait.waitFor(() -> countHostedTablets(c, tid) == 0L, 340_000);
+ 
+     }
+   }
+ 
+   // Note that these tests can talk several minutes each because by the time 
the test
+   // code changes the configuration, the minc has failed so many times that 
the minc
+   // is waiting for a few minutes before trying again. For example, I saw 
this backoff
+   // timing:
+   //
+   // DEBUG: MinC failed sleeping 169 ms before retrying
+   // DEBUG: MinC failed sleeping 601 ms before retrying
+   // DEBUG: MinC failed sleeping 2004 ms before retrying
+   // DEBUG: MinC failed sleeping 11891 ms before retrying
+   // DEBUG: MinC failed sleeping 43156 ms before retrying
+   // DEBUG: MinC failed sleeping 179779 ms before retrying
+   @Test
+   public void testBadIteratorOnStack() throws Exception {
+ 
+     // In this scenario the table is using an iterator for minc that is 
throwing an exception.
+     // This test ensures that the Tablet can still be unloaded normally by 
taking if offline
+     // after the bad iterator has been removed from the minc configuration.
+ 
+     try (AccumuloClient c = 
Accumulo.newClient().from(getClientProps()).build()) {
+ 
+       String tableName = getUniqueNames(1)[0];
+       final var tops = c.tableOperations();
+ 
+       tops.create(tableName);
+       final var tid = TableId.of(tops.tableIdMap().get(tableName));
+ 
+       IteratorSetting is = new IteratorSetting(30, BadIterator.class);
+       c.tableOperations().attachIterator(tableName, is, 
EnumSet.of(IteratorScope.minc));
+ 
+       try (BatchWriter bw = c.createBatchWriter(tableName)) {
+         Mutation m = new Mutation(new Text("r1"));
+         m.put("acf", tableName, "1");
+         bw.addMutation(m);
+       }
+ 
+       c.tableOperations().flush(tableName, null, null, false);
+ 
 -      UtilWaitThread.sleepUninterruptibly(5, TimeUnit.SECONDS);
++      UtilWaitThread.sleep(5000);
+ 
+       // minc should fail, so there should be no files
+       FunctionalTestUtils.checkRFiles(c, tableName, 1, 1, 0, 0);
+ 
+       // tell the server to take the table offline
+       tops.offline(tableName);
+ 
+       // The offine operation should not be able to complete because the 
tablet can not minor
+       // compact, give the offline operation a bit of time to attempt to 
complete even though it
+       // should never be able to complete.
 -      UtilWaitThread.sleepUninterruptibly(5, TimeUnit.SECONDS);
++      UtilWaitThread.sleep(5000);
+ 
+       assertTrue(countHostedTablets(c, tid) > 0);
+ 
+       // minc should fail, so there should be no files
+       FunctionalTestUtils.checkRFiles(c, tableName, 1, 1, 0, 0);
+ 
+       // remove the bad iterator. The failing minc task is in a backoff retry 
loop
+       // and should pick up this change on the next try
+       c.tableOperations().removeIterator(tableName, 
BadIterator.class.getSimpleName(),
+           EnumSet.of(IteratorScope.minc));
+ 
+       // Minc should have completed successfully
+       Wait.waitFor(() -> tabletHasExpectedRFiles(c, tableName, 1, 1, 1, 1), 
340_000);
+ 
+       // The previous operation to offline the table should be able to 
succeed after the minor
+       // compaction completed
+       Wait.waitFor(() -> countHostedTablets(c, tid) == 0L, 340_000);
+     }
+   }
+ 
+   public static void 
setInvalidClassLoaderContextPropertyWithoutValidation(ServerContext context,
+       TableId tableId) {
+     TablePropKey key = TablePropKey.of(context, tableId);
+     context.getPropStore().putAll(key,
+         Map.of(Property.TABLE_CLASSLOADER_CONTEXT.getKey(), "invalid"));
+   }
+ 
+   public static void removeInvalidClassLoaderContextProperty(AccumuloClient 
client,
+       String tableName) {
+     try {
+       client.tableOperations().removeProperty(tableName,
+           Property.TABLE_CLASSLOADER_CONTEXT.getKey());
+     } catch (AccumuloException | AccumuloSecurityException e) {
+       throw new RuntimeException(e);
+     }
+   }
+ 
+   public static boolean tabletHasExpectedRFiles(AccumuloClient c, String 
tableName, int minTablets,
+       int maxTablets, int minRFiles, int maxRFiles) {
+     try {
+       FunctionalTestUtils.checkRFiles(c, tableName, minTablets, maxTablets, 
minRFiles, maxRFiles);
+       return true;
+     } catch (Exception e) {
+       return false;
+     }
+   }
+ 
+   public static long countHostedTablets(AccumuloClient c, TableId tid) {
+     try (TabletsMetadata tm = ((ClientContext) 
c).getAmple().readTablets().forTable(tid)
+         .fetch(ColumnType.LOCATION).build()) {
+       return tm.stream().count();
+     }
+   }
+ }

Reply via email to