This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
commit fca29a9f85bd06f0a81d637a92bb5f4700a26894 Merge: 38d28c233c e3e4c26e65 Author: Keith Turner <ktur...@apache.org> AuthorDate: Fri Aug 11 17:47:23 2023 -0400 Merge branch '2.1' .../accumulo/core/classloader/ClassLoaderUtil.java | 19 ++ .../core/conf/ConfigurationTypeHelper.java | 2 +- .../core/spi/common/ContextClassLoaderFactory.java | 4 +- .../accumulo/server/conf/TableConfiguration.java | 8 + .../org/apache/accumulo/server/util/PropUtil.java | 15 +- .../accumulo/server/util/SystemPropUtil.java | 19 +- .../accumulo/tserver/TabletClientHandler.java | 3 + .../accumulo/tserver/tablet/CompactableImpl.java | 7 + .../tserver/tablet/MinorCompactionTask.java | 9 +- .../org/apache/accumulo/tserver/tablet/Tablet.java | 78 ++++-- .../accumulo/shell/commands/ConfigCommand.java | 31 ++- .../test/functional/HalfClosedTablet2IT.java | 121 +++++++++ .../test/functional/HalfClosedTabletIT.java | 282 +++++++++++++++++++++ .../org/apache/accumulo/test/shell/ShellIT.java | 16 +- .../apache/accumulo/test/shell/ShellServerIT.java | 79 ++++++ 15 files changed, 651 insertions(+), 42 deletions(-) diff --cc core/src/main/java/org/apache/accumulo/core/conf/ConfigurationTypeHelper.java index 9daadb58b4,8893ae8ce9..ef5e0b3b5e --- a/core/src/main/java/org/apache/accumulo/core/conf/ConfigurationTypeHelper.java +++ b/core/src/main/java/org/apache/accumulo/core/conf/ConfigurationTypeHelper.java @@@ -172,8 -173,8 +172,8 @@@ public class ConfigurationTypeHelper try { instance = getClassInstance(context, clazzName, base); - } catch (RuntimeException | IOException | ReflectiveOperationException e) { + } catch (RuntimeException | ReflectiveOperationException e) { - log.warn("Failed to load class {} in classloader context {}", clazzName, context, e); + log.error("Failed to load class {} in classloader context {}", clazzName, context, e); } if (instance == null && defaultInstance != null) { diff --cc core/src/main/java/org/apache/accumulo/core/spi/common/ContextClassLoaderFactory.java index a742ad0a33,3d9c18683f..c0e1863802 --- a/core/src/main/java/org/apache/accumulo/core/spi/common/ContextClassLoaderFactory.java +++ b/core/src/main/java/org/apache/accumulo/core/spi/common/ContextClassLoaderFactory.java @@@ -55,15 -55,16 +55,17 @@@ public interface ContextClassLoaderFact default void init(ContextClassLoaderEnvironment env) {} /** - * Get the class loader for the given contextName. Callers should not cache the ClassLoader result - * as it may change if/when the ClassLoader reloads. Implementations should throw a - * RuntimeException of some type (such as IllegalArgumentException) if the provided contextName is - * not supported or fails to be constructed. + * Get the class loader for the given context. Callers should not cache the ClassLoader result as + * it may change if/when the ClassLoader reloads. Implementations should throw a RuntimeException + * of some type (such as IllegalArgumentException) if the provided context is not supported or + * fails to be constructed. * - * @param contextName the name of the context that represents a class loader that is managed by - * this factory. Currently, Accumulo will only call this method for non-null and non-empty + * @param context the name of the context that represents a class loader that is managed by this - * factory (can be null) ++ * factory. Currently, Accumulo will only call this method for non-null and non-empty + * context. For empty or null context, Accumulo will use the system classloader without + * consulting this plugin. - * @return the class loader for the given contextName + * @return the class loader for the given context */ - ClassLoader getClassLoader(String contextName); + ClassLoader getClassLoader(String context); + } diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/MinorCompactionTask.java index 349e36d43a,f190d79460..ec5db3b815 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/MinorCompactionTask.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/MinorCompactionTask.java @@@ -18,10 -18,7 +18,8 @@@ */ package org.apache.accumulo.tserver.tablet; - import java.io.IOException; - -import org.apache.accumulo.core.metadata.TabletFile; +import org.apache.accumulo.core.file.FilePrefix; +import org.apache.accumulo.core.metadata.ReferencedTabletFile; import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.core.trace.TraceUtil; import org.apache.accumulo.tserver.MinorCompactionReason; diff --cc test/src/main/java/org/apache/accumulo/test/functional/HalfClosedTabletIT.java index 0000000000,8f6d907e96..c06a23da72 mode 000000,100644..100644 --- a/test/src/main/java/org/apache/accumulo/test/functional/HalfClosedTabletIT.java +++ b/test/src/main/java/org/apache/accumulo/test/functional/HalfClosedTabletIT.java @@@ -1,0 -1,282 +1,282 @@@ + /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.accumulo.test.functional; + + import static org.junit.jupiter.api.Assertions.assertTrue; + + import java.util.EnumSet; + import java.util.List; + import java.util.Map; + import java.util.concurrent.TimeUnit; + + import org.apache.accumulo.core.client.Accumulo; + import org.apache.accumulo.core.client.AccumuloClient; + import org.apache.accumulo.core.client.AccumuloException; + import org.apache.accumulo.core.client.AccumuloSecurityException; + import org.apache.accumulo.core.client.BatchWriter; + import org.apache.accumulo.core.client.IteratorSetting; + import org.apache.accumulo.core.client.admin.CompactionConfig; + import org.apache.accumulo.core.clientImpl.ClientContext; + import org.apache.accumulo.core.conf.Property; + import org.apache.accumulo.core.data.Mutation; + import org.apache.accumulo.core.data.TableId; + import org.apache.accumulo.core.data.Value; + import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope; + import org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType; + import org.apache.accumulo.core.metadata.schema.TabletsMetadata; + import org.apache.accumulo.core.util.UtilWaitThread; + import org.apache.accumulo.harness.MiniClusterConfigurationCallback; + import org.apache.accumulo.harness.SharedMiniClusterBase; + import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl; + import org.apache.accumulo.server.ServerContext; + import org.apache.accumulo.server.conf.store.TablePropKey; + import org.apache.accumulo.test.util.Wait; + import org.apache.hadoop.conf.Configuration; + import org.apache.hadoop.io.Text; + import org.junit.jupiter.api.AfterAll; + import org.junit.jupiter.api.BeforeAll; + import org.junit.jupiter.api.Test; + + import com.google.common.collect.Sets; + + // This IT tests the cases seen in https://github.com/apache/accumulo/issues/3674 + // where a failing minor compaction causes a Tablet.initiateClose to leave the + // Tablet in a half-closed state. The half-closed Tablet cannot be unloaded and + // the TabletServer cannot be shutdown normally. Because the minor compaction has + // been failing the Tablet needs to be recovered when it's ultimately re-assigned. + // + public class HalfClosedTabletIT extends SharedMiniClusterBase { + + public static class HalfClosedTabletITConfiguration implements MiniClusterConfigurationCallback { + + @Override + public void configureMiniCluster(MiniAccumuloConfigImpl cfg, Configuration coreSite) { + cfg.setNumTservers(1); + } + + } + + @BeforeAll + public static void startup() throws Exception { + SharedMiniClusterBase.startMiniClusterWithConfig(new HalfClosedTabletITConfiguration()); + } + + @AfterAll + public static void shutdown() throws Exception { + SharedMiniClusterBase.stopMiniCluster(); + } + + @Test + public void testSplitWithInvalidContext() throws Exception { + + // In this scenario the table has been mis-configured with an invalid context name. + // The minor compaction task is failing because classes like the volume chooser or + // user iterators cannot be loaded. The user calls Tablet.split which calls initiateClose. + // This test ensures that the Tablet can still be unloaded normally by taking if offline + // after the split call with an invalid context. The context property is removed after the + // split call below to get the minor compaction task to succeed on a subsequent run. Because + // the minor compaction task backs off when retrying, this could take some time. + + String tableName = getUniqueNames(1)[0]; + + try (final var client = Accumulo.newClient().from(getClientProps()).build()) { + final var tops = client.tableOperations(); + tops.create(tableName); + TableId tableId = TableId.of(tops.tableIdMap().get(tableName)); + try (final var bw = client.createBatchWriter(tableName)) { + final var m1 = new Mutation("a"); + final var m2 = new Mutation("b"); + m1.put(new Text("cf"), new Text(), new Value()); + m2.put(new Text("cf"), new Text(), new Value()); + bw.addMutation(m1); + bw.addMutation(m2); + } + + setInvalidClassLoaderContextPropertyWithoutValidation(getCluster().getServerContext(), + tableId); + + // Need to wait for TabletServer to pickup configuration change + Thread.sleep(3000); + + Thread configFixer = new Thread(() -> { + UtilWaitThread.sleep(3000); + removeInvalidClassLoaderContextProperty(client, tableName); + }); + + long t1 = System.nanoTime(); + configFixer.start(); + + // The split will probably start running w/ bad config that will cause it to get stuck. + // However once the config is fixed by the background thread it should continue. + tops.addSplits(tableName, Sets.newTreeSet(List.of(new Text("b")))); + + long t2 = System.nanoTime(); + // expect that split took at least 3 seconds because that is the time it takes to fix the + // config + assertTrue(TimeUnit.NANOSECONDS.toMillis(t2 - t1) >= 3000); + + // offline the table which will unload the tablets. If the context property is not + // removed above, then this test will fail because the tablets will not be able to be + // unloaded + tops.offline(tableName); + + Wait.waitFor(() -> countHostedTablets(client, tableId) == 0L, 340_000); + } + } + + @Test + public void testIteratorThrowingTransientError() throws Exception { + + // In this scenario a minc iterator throws an error some number of time, then + // succeeds. We want to verify that the minc is being retried and the tablet + // can be closed. + + try (AccumuloClient c = Accumulo.newClient().from(getClientProps()).build()) { + + String tableName = getUniqueNames(1)[0]; + final var tops = c.tableOperations(); + + tops.create(tableName); + final var tid = TableId.of(tops.tableIdMap().get(tableName)); + + try (BatchWriter bw = c.createBatchWriter(tableName)) { + Mutation m = new Mutation(new Text("r1")); + m.put("acf", tableName, "1"); + bw.addMutation(m); + } + + IteratorSetting setting = new IteratorSetting(50, "error", ErrorThrowingIterator.class); + setting.addOption(ErrorThrowingIterator.TIMES, "3"); + c.tableOperations().attachIterator(tableName, setting, EnumSet.of(IteratorScope.minc)); + c.tableOperations().compact(tableName, new CompactionConfig().setWait(true).setFlush(true)); + + // Taking the table offline should succeed normally + tops.offline(tableName); + + // Minc should have completed successfully + Wait.waitFor(() -> tabletHasExpectedRFiles(c, tableName, 1, 1, 1, 1), 340_000); + + Wait.waitFor(() -> countHostedTablets(c, tid) == 0L, 340_000); + + } + } + + // Note that these tests can talk several minutes each because by the time the test + // code changes the configuration, the minc has failed so many times that the minc + // is waiting for a few minutes before trying again. For example, I saw this backoff + // timing: + // + // DEBUG: MinC failed sleeping 169 ms before retrying + // DEBUG: MinC failed sleeping 601 ms before retrying + // DEBUG: MinC failed sleeping 2004 ms before retrying + // DEBUG: MinC failed sleeping 11891 ms before retrying + // DEBUG: MinC failed sleeping 43156 ms before retrying + // DEBUG: MinC failed sleeping 179779 ms before retrying + @Test + public void testBadIteratorOnStack() throws Exception { + + // In this scenario the table is using an iterator for minc that is throwing an exception. + // This test ensures that the Tablet can still be unloaded normally by taking if offline + // after the bad iterator has been removed from the minc configuration. + + try (AccumuloClient c = Accumulo.newClient().from(getClientProps()).build()) { + + String tableName = getUniqueNames(1)[0]; + final var tops = c.tableOperations(); + + tops.create(tableName); + final var tid = TableId.of(tops.tableIdMap().get(tableName)); + + IteratorSetting is = new IteratorSetting(30, BadIterator.class); + c.tableOperations().attachIterator(tableName, is, EnumSet.of(IteratorScope.minc)); + + try (BatchWriter bw = c.createBatchWriter(tableName)) { + Mutation m = new Mutation(new Text("r1")); + m.put("acf", tableName, "1"); + bw.addMutation(m); + } + + c.tableOperations().flush(tableName, null, null, false); + - UtilWaitThread.sleepUninterruptibly(5, TimeUnit.SECONDS); ++ UtilWaitThread.sleep(5000); + + // minc should fail, so there should be no files + FunctionalTestUtils.checkRFiles(c, tableName, 1, 1, 0, 0); + + // tell the server to take the table offline + tops.offline(tableName); + + // The offine operation should not be able to complete because the tablet can not minor + // compact, give the offline operation a bit of time to attempt to complete even though it + // should never be able to complete. - UtilWaitThread.sleepUninterruptibly(5, TimeUnit.SECONDS); ++ UtilWaitThread.sleep(5000); + + assertTrue(countHostedTablets(c, tid) > 0); + + // minc should fail, so there should be no files + FunctionalTestUtils.checkRFiles(c, tableName, 1, 1, 0, 0); + + // remove the bad iterator. The failing minc task is in a backoff retry loop + // and should pick up this change on the next try + c.tableOperations().removeIterator(tableName, BadIterator.class.getSimpleName(), + EnumSet.of(IteratorScope.minc)); + + // Minc should have completed successfully + Wait.waitFor(() -> tabletHasExpectedRFiles(c, tableName, 1, 1, 1, 1), 340_000); + + // The previous operation to offline the table should be able to succeed after the minor + // compaction completed + Wait.waitFor(() -> countHostedTablets(c, tid) == 0L, 340_000); + } + } + + public static void setInvalidClassLoaderContextPropertyWithoutValidation(ServerContext context, + TableId tableId) { + TablePropKey key = TablePropKey.of(context, tableId); + context.getPropStore().putAll(key, + Map.of(Property.TABLE_CLASSLOADER_CONTEXT.getKey(), "invalid")); + } + + public static void removeInvalidClassLoaderContextProperty(AccumuloClient client, + String tableName) { + try { + client.tableOperations().removeProperty(tableName, + Property.TABLE_CLASSLOADER_CONTEXT.getKey()); + } catch (AccumuloException | AccumuloSecurityException e) { + throw new RuntimeException(e); + } + } + + public static boolean tabletHasExpectedRFiles(AccumuloClient c, String tableName, int minTablets, + int maxTablets, int minRFiles, int maxRFiles) { + try { + FunctionalTestUtils.checkRFiles(c, tableName, minTablets, maxTablets, minRFiles, maxRFiles); + return true; + } catch (Exception e) { + return false; + } + } + + public static long countHostedTablets(AccumuloClient c, TableId tid) { + try (TabletsMetadata tm = ((ClientContext) c).getAmple().readTablets().forTable(tid) + .fetch(ColumnType.LOCATION).build()) { + return tm.stream().count(); + } + } + }