Repository: phoenix Updated Branches: refs/heads/master 3477977f3 -> d541d6f28
PHOENIX-4027 Mark index as disabled during partial rebuild after configurable amount of time Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/d541d6f2 Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/d541d6f2 Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/d541d6f2 Branch: refs/heads/master Commit: d541d6f2875a590580e8ccf05f26795083b06658 Parents: 3477977 Author: Samarth Jain <[email protected]> Authored: Fri Jul 14 17:56:16 2017 -0700 Committer: Samarth Jain <[email protected]> Committed: Fri Jul 14 17:56:16 2017 -0700 ---------------------------------------------------------------------- .../phoenix/end2end/PhoenixRuntimeIT.java | 6 -- .../end2end/index/MutableIndexFailureIT.java | 3 + .../coprocessor/MetaDataRegionObserver.java | 60 +++++++++++--------- .../org/apache/phoenix/query/QueryServices.java | 2 + .../phoenix/query/QueryServicesOptions.java | 4 +- 5 files changed, 41 insertions(+), 34 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/phoenix/blob/d541d6f2/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java index 1109070..95ab906 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java @@ -187,9 +187,6 @@ public class PhoenixRuntimeIT extends ParallelStatsDisabledIT { assertEquals( Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER), rebuildQueryServicesConfig.get(HConstants.HBASE_CLIENT_RETRIES_NUMBER)); - assertEquals( - Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE), - rebuildQueryServicesConfig.get(HConstants.HBASE_CLIENT_PAUSE)); ConnectionQueryServices rebuildQueryServices = rebuildIndexConnection.getQueryServices(); HConnection rebuildIndexHConnection = (HConnection) Whitebox.getInternalState(rebuildQueryServices, @@ -212,9 +209,6 @@ public class PhoenixRuntimeIT extends ParallelStatsDisabledIT { assertEquals( Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER), rebuildHConnectionConfig.get(HConstants.HBASE_CLIENT_RETRIES_NUMBER)); - assertEquals( - Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE), - rebuildHConnectionConfig.get(HConstants.HBASE_CLIENT_PAUSE)); } } } http://git-wip-us.apache.org/repos/asf/phoenix/blob/d541d6f2/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java index f47707b..8e2564d 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java @@ -127,6 +127,9 @@ public class MutableIndexFailureIT extends BaseTest { serverProps.put("hbase.balancer.period", String.valueOf(Integer.MAX_VALUE)); serverProps.put(QueryServices.INDEX_FAILURE_HANDLING_REBUILD_ATTRIB, Boolean.TRUE.toString()); serverProps.put(QueryServices.INDEX_FAILURE_HANDLING_REBUILD_INTERVAL_ATTRIB, "4000"); + serverProps.put(QueryServices.INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD, "30000"); // give up rebuilding after 30 seconds + // need to override rpc retries otherwise test doesn't pass + serverProps.put(QueryServices.INDEX_REBUILD_RPC_RETRIES_COUNTER, Long.toString(1)); Map<String, String> clientProps = Collections.singletonMap(QueryServices.TRANSACTIONS_ENABLED, Boolean.TRUE.toString()); NUM_SLAVES_BASE = 4; setUpTestDriver(new ReadOnlyProps(serverProps.entrySet().iterator()), new ReadOnlyProps(clientProps.entrySet().iterator())); http://git-wip-us.apache.org/repos/asf/phoenix/blob/d541d6f2/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java index 4c63587..5cfacfc 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java @@ -76,8 +76,10 @@ import org.apache.phoenix.schema.MetaDataClient; import org.apache.phoenix.schema.PIndexState; import org.apache.phoenix.schema.PName; import org.apache.phoenix.schema.PTable; +import org.apache.phoenix.schema.SortOrder; import org.apache.phoenix.schema.TableNotFoundException; import org.apache.phoenix.schema.TableRef; +import org.apache.phoenix.schema.types.PChar; import org.apache.phoenix.schema.types.PLong; import org.apache.phoenix.util.ByteUtil; import org.apache.phoenix.util.MetaDataUtil; @@ -105,7 +107,6 @@ public class MetaDataRegionObserver extends BaseRegionObserver { private boolean enableRebuildIndex = QueryServicesOptions.DEFAULT_INDEX_FAILURE_HANDLING_REBUILD; private long rebuildIndexTimeInterval = QueryServicesOptions.DEFAULT_INDEX_FAILURE_HANDLING_REBUILD_INTERVAL; private static Map<PName, Long> batchExecutedPerTableMap = new HashMap<PName, Long>(); - @GuardedBy("MetaDataRegionObserver.class") private static Properties rebuildIndexConnectionProps; @@ -194,8 +195,7 @@ public class MetaDataRegionObserver extends BaseRegionObserver { initRebuildIndexConnectionProps(e.getEnvironment().getConfiguration()); // starts index rebuild schedule work BuildIndexScheduleTask task = new BuildIndexScheduleTask(e.getEnvironment()); - // run scheduled task every 10 secs - executor.scheduleAtFixedRate(task, 10000, rebuildIndexTimeInterval, TimeUnit.MILLISECONDS); + executor.scheduleWithFixedDelay(task, 10000, rebuildIndexTimeInterval, TimeUnit.MILLISECONDS); } catch (ClassNotFoundException ex) { LOG.error("BuildIndexScheduleTask cannot start!", ex); } @@ -212,6 +212,7 @@ public class MetaDataRegionObserver extends BaseRegionObserver { RegionCoprocessorEnvironment env; private long rebuildIndexBatchSize = HConstants.LATEST_TIMESTAMP; private long configuredBatches = 10; + private long indexDisableTimestampThreshold; public BuildIndexScheduleTask(RegionCoprocessorEnvironment env) { this.env = env; @@ -220,6 +221,9 @@ public class MetaDataRegionObserver extends BaseRegionObserver { QueryServices.INDEX_FAILURE_HANDLING_REBUILD_PERIOD, HConstants.LATEST_TIMESTAMP); this.configuredBatches = configuration.getLong( QueryServices.INDEX_FAILURE_HANDLING_REBUILD_NUMBER_OF_BATCHES_PER_TABLE, configuredBatches); + this.indexDisableTimestampThreshold = + configuration.getLong(QueryServices.INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD, + QueryServicesOptions.DEFAULT_INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD); } @Override @@ -309,6 +313,32 @@ public class MetaDataRegionObserver extends BaseRegionObserver { + indexPTable.getName() + " are online."); continue; } + long indexDisableTimestamp = + PLong.INSTANCE.getCodec().decodeLong(disabledTimeStamp, 0, + SortOrder.ASC); + PIndexState state = PIndexState.fromSerializedValue(indexState[0]); + if (indexDisableTimestamp > 0 && System.currentTimeMillis() + - indexDisableTimestamp > indexDisableTimestampThreshold) { + /* + * It has been too long since the index has been disabled and any future + * attempts to reenable it likely will fail. So we are going to mark the + * index as disabled and set the index disable timestamp to 0 so that the + * rebuild task won't pick up this index again for rebuild. + */ + try { + updateIndexState(conn, indexTableFullName, env, state, + PIndexState.DISABLE, 0l); + LOG.error("Unable to rebuild index " + indexTableFullName + + ". Won't attempt again since index disable timestamp is older than current time by " + + indexDisableTimestampThreshold + + " milliseconds. Manual intervention needed to re-build the index"); + } catch (Throwable ex) { + LOG.error( + "Unable to mark index " + indexTableFullName + " as disabled.", ex); + } + continue; // don't attempt another rebuild irrespective of whether + // updateIndexState worked or not + } // Allow index to begin incremental maintenance as index is back online and we // cannot transition directly from DISABLED -> ACTIVE if (Bytes.compareTo(PIndexState.DISABLE.getSerializedBytes(), indexState) == 0) { @@ -430,24 +460,7 @@ public class MetaDataRegionObserver extends BaseRegionObserver { } } } catch (Exception e) { - for (PTable index : indexesToPartiallyRebuild) { - String indexTableFullName = SchemaUtil.getTableName( - index.getSchemaName().getString(), - index.getTableName().getString()); - try { - /* - * We are going to mark the index as disabled and set the index - * disable timestamp to 0 so that the rebuild task won't pick up - * this index again for rebuild. - */ - updateIndexState(conn, indexTableFullName, env, - PIndexState.INACTIVE, PIndexState.DISABLE, 0l); - } catch (Throwable ex) { - LOG.error("Unable to mark index " + indexTableFullName + " as disabled after rebuilding it failed", ex); - } - } - LOG.error("Unable to rebuild " + dataPTable + " indexes " + indexesToPartiallyRebuild - + ". Won't attempt again. Manual intervention needed to re-build the index", e); + LOG.error("Unable to rebuild " + dataPTable + " indexes " + indexesToPartiallyRebuild, e); } } } @@ -550,9 +563,6 @@ public class MetaDataRegionObserver extends BaseRegionObserver { int indexRebuildRpcRetriesCounter = config.getInt(QueryServices.INDEX_REBUILD_RPC_RETRIES_COUNTER, QueryServicesOptions.DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER); - long indexRebuildRpcRetryPauseTimeMs = - config.getLong(QueryServices.INDEX_REBUILD_RPC_RETRY_PAUSE_TIME, - QueryServicesOptions.DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE); // Set SCN so that we don't ping server and have the upper bound set back to // the timestamp when the failure occurred. props.setProperty(PhoenixRuntime.CURRENT_SCN_ATTRIB, Long.toString(Long.MAX_VALUE)); @@ -565,8 +575,6 @@ public class MetaDataRegionObserver extends BaseRegionObserver { Long.toString(indexRebuildRPCTimeoutMs)); props.setProperty(HConstants.HBASE_CLIENT_RETRIES_NUMBER, Long.toString(indexRebuildRpcRetriesCounter)); - props.setProperty(HConstants.HBASE_CLIENT_PAUSE, - Long.toString(indexRebuildRpcRetryPauseTimeMs)); // don't run a second index populations upsert select props.setProperty(QueryServices.INDEX_POPULATION_SLEEP_TIME, "0"); rebuildIndexConnectionProps = PropertiesUtil.combineProperties(props, config); http://git-wip-us.apache.org/repos/asf/phoenix/blob/d541d6f2/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java index a0575d9..57aba16 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java @@ -142,6 +142,8 @@ public interface QueryServices extends SQLCloseable { "phoenix.index.failure.handling.rebuild.interval"; public static final String INDEX_FAILURE_HANDLING_REBUILD_NUMBER_OF_BATCHES_PER_TABLE = "phoenix.index.rebuild.batch.perTable"; + // If index disable timestamp is older than this threshold, then index rebuild task won't attempt to rebuild it + public static final String INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD = "phoenix.index.rebuild.disabletimestamp.threshold"; // Block writes to data table when index write fails public static final String INDEX_FAILURE_BLOCK_WRITE = "phoenix.index.failure.block.write"; http://git-wip-us.apache.org/repos/asf/phoenix/blob/d541d6f2/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java index e6be091..21d8300 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java @@ -185,8 +185,8 @@ public class QueryServicesOptions { public static final long DEFAULT_INDEX_REBUILD_QUERY_TIMEOUT = 30000 * 60; // 30 mins public static final long DEFAULT_INDEX_REBUILD_RPC_TIMEOUT = 30000 * 60; // 30 mins public static final long DEFAULT_INDEX_REBUILD_CLIENT_SCANNER_TIMEOUT = 30000 * 60; // 30 mins - public static final int DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER = 5; - public static final long DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE = 3000; // 3 seconds + public static final int DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER = 1; // no retries at rpc level + public static final int DEFAULT_INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD = 30000 * 60; // 30 mins /** * HConstants#HIGH_QOS is the max we will see to a standard table. We go higher to differentiate
