Repository: phoenix
Updated Branches:
  refs/heads/4.x-HBase-1.1 d15ecd47a -> cd0eb1254


PHOENIX-4027 Mark index as disabled during partial rebuild after configurable 
amount of time


Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/cd0eb125
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/cd0eb125
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/cd0eb125

Branch: refs/heads/4.x-HBase-1.1
Commit: cd0eb1254f4551638471e1b6df6d6349dfb58d6d
Parents: d15ecd4
Author: Samarth Jain <[email protected]>
Authored: Fri Jul 14 17:57:04 2017 -0700
Committer: Samarth Jain <[email protected]>
Committed: Fri Jul 14 17:57:04 2017 -0700

----------------------------------------------------------------------
 .../phoenix/end2end/PhoenixRuntimeIT.java       |  6 --
 .../end2end/index/MutableIndexFailureIT.java    |  3 +
 .../coprocessor/MetaDataRegionObserver.java     | 60 +++++++++++---------
 .../org/apache/phoenix/query/QueryServices.java |  2 +
 .../phoenix/query/QueryServicesOptions.java     |  4 +-
 5 files changed, 41 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/phoenix/blob/cd0eb125/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java
----------------------------------------------------------------------
diff --git 
a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java 
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java
index 1109070..95ab906 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java
@@ -187,9 +187,6 @@ public class PhoenixRuntimeIT extends 
ParallelStatsDisabledIT {
                 assertEquals(
                     
Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER),
                     
rebuildQueryServicesConfig.get(HConstants.HBASE_CLIENT_RETRIES_NUMBER));
-                assertEquals(
-                    
Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE),
-                    
rebuildQueryServicesConfig.get(HConstants.HBASE_CLIENT_PAUSE));
                 ConnectionQueryServices rebuildQueryServices = 
rebuildIndexConnection.getQueryServices();
                 HConnection rebuildIndexHConnection =
                         (HConnection) 
Whitebox.getInternalState(rebuildQueryServices,
@@ -212,9 +209,6 @@ public class PhoenixRuntimeIT extends 
ParallelStatsDisabledIT {
                 assertEquals(
                     
Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER),
                     
rebuildHConnectionConfig.get(HConstants.HBASE_CLIENT_RETRIES_NUMBER));
-                assertEquals(
-                    
Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE),
-                    
rebuildHConnectionConfig.get(HConstants.HBASE_CLIENT_PAUSE));
             }
         }
     }

http://git-wip-us.apache.org/repos/asf/phoenix/blob/cd0eb125/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java
----------------------------------------------------------------------
diff --git 
a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java
 
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java
index ec0b06f..14b762e 100644
--- 
a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java
+++ 
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java
@@ -129,6 +129,9 @@ public class MutableIndexFailureIT extends BaseTest {
         serverProps.put("hbase.balancer.period", 
String.valueOf(Integer.MAX_VALUE));
         serverProps.put(QueryServices.INDEX_FAILURE_HANDLING_REBUILD_ATTRIB, 
Boolean.TRUE.toString());
         
serverProps.put(QueryServices.INDEX_FAILURE_HANDLING_REBUILD_INTERVAL_ATTRIB, 
"4000");
+        
serverProps.put(QueryServices.INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD, 
"30000"); // give up rebuilding after 30 seconds
+        // need to override rpc retries otherwise test doesn't pass
+        serverProps.put(QueryServices.INDEX_REBUILD_RPC_RETRIES_COUNTER, 
Long.toString(1));
         Map<String, String> clientProps = 
Collections.singletonMap(QueryServices.TRANSACTIONS_ENABLED, 
Boolean.TRUE.toString());
         NUM_SLAVES_BASE = 4;
         setUpTestDriver(new ReadOnlyProps(serverProps.entrySet().iterator()), 
new ReadOnlyProps(clientProps.entrySet().iterator()));

http://git-wip-us.apache.org/repos/asf/phoenix/blob/cd0eb125/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java
----------------------------------------------------------------------
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java
 
b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java
index 4c63587..5cfacfc 100644
--- 
a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java
+++ 
b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java
@@ -76,8 +76,10 @@ import org.apache.phoenix.schema.MetaDataClient;
 import org.apache.phoenix.schema.PIndexState;
 import org.apache.phoenix.schema.PName;
 import org.apache.phoenix.schema.PTable;
+import org.apache.phoenix.schema.SortOrder;
 import org.apache.phoenix.schema.TableNotFoundException;
 import org.apache.phoenix.schema.TableRef;
+import org.apache.phoenix.schema.types.PChar;
 import org.apache.phoenix.schema.types.PLong;
 import org.apache.phoenix.util.ByteUtil;
 import org.apache.phoenix.util.MetaDataUtil;
@@ -105,7 +107,6 @@ public class MetaDataRegionObserver extends 
BaseRegionObserver {
     private boolean enableRebuildIndex = 
QueryServicesOptions.DEFAULT_INDEX_FAILURE_HANDLING_REBUILD;
     private long rebuildIndexTimeInterval = 
QueryServicesOptions.DEFAULT_INDEX_FAILURE_HANDLING_REBUILD_INTERVAL;
     private static Map<PName, Long> batchExecutedPerTableMap = new 
HashMap<PName, Long>();
-
     @GuardedBy("MetaDataRegionObserver.class")
     private static Properties rebuildIndexConnectionProps;
 
@@ -194,8 +195,7 @@ public class MetaDataRegionObserver extends 
BaseRegionObserver {
             
initRebuildIndexConnectionProps(e.getEnvironment().getConfiguration());
             // starts index rebuild schedule work
             BuildIndexScheduleTask task = new 
BuildIndexScheduleTask(e.getEnvironment());
-            // run scheduled task every 10 secs
-            executor.scheduleAtFixedRate(task, 10000, 
rebuildIndexTimeInterval, TimeUnit.MILLISECONDS);
+            executor.scheduleWithFixedDelay(task, 10000, 
rebuildIndexTimeInterval, TimeUnit.MILLISECONDS);
         } catch (ClassNotFoundException ex) {
             LOG.error("BuildIndexScheduleTask cannot start!", ex);
         }
@@ -212,6 +212,7 @@ public class MetaDataRegionObserver extends 
BaseRegionObserver {
         RegionCoprocessorEnvironment env;
         private long rebuildIndexBatchSize = HConstants.LATEST_TIMESTAMP;
         private long configuredBatches = 10;
+        private long indexDisableTimestampThreshold;
 
         public BuildIndexScheduleTask(RegionCoprocessorEnvironment env) {
             this.env = env;
@@ -220,6 +221,9 @@ public class MetaDataRegionObserver extends 
BaseRegionObserver {
                     QueryServices.INDEX_FAILURE_HANDLING_REBUILD_PERIOD, 
HConstants.LATEST_TIMESTAMP);
             this.configuredBatches = configuration.getLong(
                     
QueryServices.INDEX_FAILURE_HANDLING_REBUILD_NUMBER_OF_BATCHES_PER_TABLE, 
configuredBatches);
+            this.indexDisableTimestampThreshold =
+                    
configuration.getLong(QueryServices.INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD,
+                        
QueryServicesOptions.DEFAULT_INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD);
         }
 
         @Override
@@ -309,6 +313,32 @@ public class MetaDataRegionObserver extends 
BaseRegionObserver {
                                 + indexPTable.getName() + " are online.");
                         continue;
                     }
+                    long indexDisableTimestamp =
+                            
PLong.INSTANCE.getCodec().decodeLong(disabledTimeStamp, 0,
+                                SortOrder.ASC);
+                    PIndexState state = 
PIndexState.fromSerializedValue(indexState[0]);
+                    if (indexDisableTimestamp > 0 && System.currentTimeMillis()
+                            - indexDisableTimestamp > 
indexDisableTimestampThreshold) {
+                        /*
+                         * It has been too long since the index has been 
disabled and any future
+                         * attempts to reenable it likely will fail. So we are 
going to mark the
+                         * index as disabled and set the index disable 
timestamp to 0 so that the
+                         * rebuild task won't pick up this index again for 
rebuild.
+                         */
+                        try {
+                            updateIndexState(conn, indexTableFullName, env, 
state,
+                                PIndexState.DISABLE, 0l);
+                            LOG.error("Unable to rebuild index " + 
indexTableFullName
+                                    + ". Won't attempt again since index 
disable timestamp is older than current time by "
+                                    + indexDisableTimestampThreshold
+                                    + " milliseconds. Manual intervention 
needed to re-build the index");
+                        } catch (Throwable ex) {
+                            LOG.error(
+                                "Unable to mark index " + indexTableFullName + 
" as disabled.", ex);
+                        }
+                        continue; // don't attempt another rebuild 
irrespective of whether
+                                  // updateIndexState worked or not
+                    }
                     // Allow index to begin incremental maintenance as index 
is back online and we
                     // cannot transition directly from DISABLED -> ACTIVE
                     if 
(Bytes.compareTo(PIndexState.DISABLE.getSerializedBytes(), indexState) == 0) {
@@ -430,24 +460,7 @@ public class MetaDataRegionObserver extends 
BaseRegionObserver {
                                                                }
                                                        }
                                                } catch (Exception e) {
-                                                       for (PTable index : 
indexesToPartiallyRebuild) {
-                                                       String 
indexTableFullName = SchemaUtil.getTableName(
-                                    index.getSchemaName().getString(),
-                                    index.getTableName().getString());
-                                try {
-                                    /*
-                                     * We are going to mark the index as 
disabled and set the index
-                                     * disable timestamp to 0 so that the 
rebuild task won't pick up
-                                     * this index again for rebuild.
-                                     */
-                                    updateIndexState(conn, indexTableFullName, 
env,
-                                        PIndexState.INACTIVE, 
PIndexState.DISABLE, 0l);
-                                } catch (Throwable ex) {
-                                                           LOG.error("Unable 
to mark index " + indexTableFullName + " as disabled after rebuilding it 
failed", ex);
-                                                       }
-                                                   }
-                                                       LOG.error("Unable to 
rebuild " + dataPTable + " indexes " + indexesToPartiallyRebuild
-                                                                       + ". 
Won't attempt again. Manual intervention needed to re-build the index", e);
+                                                       LOG.error("Unable to 
rebuild " + dataPTable + " indexes " + indexesToPartiallyRebuild, e);
                                                }
                                        }
                                }
@@ -550,9 +563,6 @@ public class MetaDataRegionObserver extends 
BaseRegionObserver {
             int indexRebuildRpcRetriesCounter =
                     
config.getInt(QueryServices.INDEX_REBUILD_RPC_RETRIES_COUNTER,
                         
QueryServicesOptions.DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER);
-            long indexRebuildRpcRetryPauseTimeMs =
-                    
config.getLong(QueryServices.INDEX_REBUILD_RPC_RETRY_PAUSE_TIME,
-                        
QueryServicesOptions.DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE);
             // Set SCN so that we don't ping server and have the upper bound 
set back to
             // the timestamp when the failure occurred.
             props.setProperty(PhoenixRuntime.CURRENT_SCN_ATTRIB, 
Long.toString(Long.MAX_VALUE));
@@ -565,8 +575,6 @@ public class MetaDataRegionObserver extends 
BaseRegionObserver {
                 Long.toString(indexRebuildRPCTimeoutMs));
             props.setProperty(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
                 Long.toString(indexRebuildRpcRetriesCounter));
-            props.setProperty(HConstants.HBASE_CLIENT_PAUSE,
-                Long.toString(indexRebuildRpcRetryPauseTimeMs));
             // don't run a second index populations upsert select
             props.setProperty(QueryServices.INDEX_POPULATION_SLEEP_TIME, "0");
             rebuildIndexConnectionProps = 
PropertiesUtil.combineProperties(props, config);

http://git-wip-us.apache.org/repos/asf/phoenix/blob/cd0eb125/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
----------------------------------------------------------------------
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java 
b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
index a0575d9..57aba16 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
@@ -142,6 +142,8 @@ public interface QueryServices extends SQLCloseable {
         "phoenix.index.failure.handling.rebuild.interval";
     
     public static final String 
INDEX_FAILURE_HANDLING_REBUILD_NUMBER_OF_BATCHES_PER_TABLE = 
"phoenix.index.rebuild.batch.perTable";
+    // If index disable timestamp is older than this threshold, then index 
rebuild task won't attempt to rebuild it
+    public static final String INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD = 
"phoenix.index.rebuild.disabletimestamp.threshold";
 
     // Block writes to data table when index write fails
     public static final String INDEX_FAILURE_BLOCK_WRITE = 
"phoenix.index.failure.block.write";

http://git-wip-us.apache.org/repos/asf/phoenix/blob/cd0eb125/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
----------------------------------------------------------------------
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java 
b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
index e6be091..21d8300 100644
--- 
a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
+++ 
b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
@@ -185,8 +185,8 @@ public class QueryServicesOptions {
     public static final long DEFAULT_INDEX_REBUILD_QUERY_TIMEOUT = 30000 * 60; 
// 30 mins
     public static final long DEFAULT_INDEX_REBUILD_RPC_TIMEOUT = 30000 * 60; 
// 30 mins
     public static final long DEFAULT_INDEX_REBUILD_CLIENT_SCANNER_TIMEOUT = 
30000 * 60; // 30 mins
-    public static final int DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER = 5;
-    public static final long DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE = 3000; // 3 
seconds
+    public static final int DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER = 1; // 
no retries at rpc level
+    public static final int DEFAULT_INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD 
= 30000 * 60; // 30 mins
 
     /**
      * HConstants#HIGH_QOS is the max we will see to a standard table. We go 
higher to differentiate

Reply via email to