http://git-wip-us.apache.org/repos/asf/hbase-site/blob/3469cbc0/devapidocs/src-html/org/apache/hadoop/hbase/master/assignment/AssignmentManager.RegionInTransitionStat.html ---------------------------------------------------------------------- diff --git a/devapidocs/src-html/org/apache/hadoop/hbase/master/assignment/AssignmentManager.RegionInTransitionStat.html b/devapidocs/src-html/org/apache/hadoop/hbase/master/assignment/AssignmentManager.RegionInTransitionStat.html index fe1e077..90c31f4 100644 --- a/devapidocs/src-html/org/apache/hadoop/hbase/master/assignment/AssignmentManager.RegionInTransitionStat.html +++ b/devapidocs/src-html/org/apache/hadoop/hbase/master/assignment/AssignmentManager.RegionInTransitionStat.html @@ -1072,894 +1072,913 @@ <span class="sourceLineNo">1064</span><a name="line.1064"></a> <span class="sourceLineNo">1065</span> protected boolean waitServerReportEvent(final ServerName serverName, final Procedure proc) {<a name="line.1065"></a> <span class="sourceLineNo">1066</span> final ServerStateNode serverNode = regionStates.getOrCreateServer(serverName);<a name="line.1066"></a> -<span class="sourceLineNo">1067</span> return serverNode.getReportEvent().suspendIfNotReady(proc);<a name="line.1067"></a> -<span class="sourceLineNo">1068</span> }<a name="line.1068"></a> -<span class="sourceLineNo">1069</span><a name="line.1069"></a> -<span class="sourceLineNo">1070</span> protected void wakeServerReportEvent(final ServerStateNode serverNode) {<a name="line.1070"></a> -<span class="sourceLineNo">1071</span> serverNode.getReportEvent().wake(getProcedureScheduler());<a name="line.1071"></a> -<span class="sourceLineNo">1072</span> }<a name="line.1072"></a> -<span class="sourceLineNo">1073</span><a name="line.1073"></a> -<span class="sourceLineNo">1074</span> // ============================================================================================<a name="line.1074"></a> -<span class="sourceLineNo">1075</span> // RIT chore<a name="line.1075"></a> -<span class="sourceLineNo">1076</span> // ============================================================================================<a name="line.1076"></a> -<span class="sourceLineNo">1077</span> private static class RegionInTransitionChore extends ProcedureInMemoryChore<MasterProcedureEnv> {<a name="line.1077"></a> -<span class="sourceLineNo">1078</span> public RegionInTransitionChore(final int timeoutMsec) {<a name="line.1078"></a> -<span class="sourceLineNo">1079</span> super(timeoutMsec);<a name="line.1079"></a> -<span class="sourceLineNo">1080</span> }<a name="line.1080"></a> -<span class="sourceLineNo">1081</span><a name="line.1081"></a> -<span class="sourceLineNo">1082</span> @Override<a name="line.1082"></a> -<span class="sourceLineNo">1083</span> protected void periodicExecute(final MasterProcedureEnv env) {<a name="line.1083"></a> -<span class="sourceLineNo">1084</span> final AssignmentManager am = env.getAssignmentManager();<a name="line.1084"></a> -<span class="sourceLineNo">1085</span><a name="line.1085"></a> -<span class="sourceLineNo">1086</span> final RegionInTransitionStat ritStat = am.computeRegionInTransitionStat();<a name="line.1086"></a> -<span class="sourceLineNo">1087</span> if (ritStat.hasRegionsOverThreshold()) {<a name="line.1087"></a> -<span class="sourceLineNo">1088</span> for (RegionState hri: ritStat.getRegionOverThreshold()) {<a name="line.1088"></a> -<span class="sourceLineNo">1089</span> am.handleRegionOverStuckWarningThreshold(hri.getRegion());<a name="line.1089"></a> -<span class="sourceLineNo">1090</span> }<a name="line.1090"></a> -<span class="sourceLineNo">1091</span> }<a name="line.1091"></a> -<span class="sourceLineNo">1092</span><a name="line.1092"></a> -<span class="sourceLineNo">1093</span> // update metrics<a name="line.1093"></a> -<span class="sourceLineNo">1094</span> am.updateRegionsInTransitionMetrics(ritStat);<a name="line.1094"></a> -<span class="sourceLineNo">1095</span> }<a name="line.1095"></a> -<span class="sourceLineNo">1096</span> }<a name="line.1096"></a> -<span class="sourceLineNo">1097</span><a name="line.1097"></a> -<span class="sourceLineNo">1098</span> public RegionInTransitionStat computeRegionInTransitionStat() {<a name="line.1098"></a> -<span class="sourceLineNo">1099</span> final RegionInTransitionStat rit = new RegionInTransitionStat(getConfiguration());<a name="line.1099"></a> -<span class="sourceLineNo">1100</span> rit.update(this);<a name="line.1100"></a> -<span class="sourceLineNo">1101</span> return rit;<a name="line.1101"></a> -<span class="sourceLineNo">1102</span> }<a name="line.1102"></a> -<span class="sourceLineNo">1103</span><a name="line.1103"></a> -<span class="sourceLineNo">1104</span> public static class RegionInTransitionStat {<a name="line.1104"></a> -<span class="sourceLineNo">1105</span> private final int ritThreshold;<a name="line.1105"></a> +<span class="sourceLineNo">1067</span> if (serverNode == null) {<a name="line.1067"></a> +<span class="sourceLineNo">1068</span> LOG.warn("serverName=null; {}", proc);<a name="line.1068"></a> +<span class="sourceLineNo">1069</span> }<a name="line.1069"></a> +<span class="sourceLineNo">1070</span> return serverNode.getReportEvent().suspendIfNotReady(proc);<a name="line.1070"></a> +<span class="sourceLineNo">1071</span> }<a name="line.1071"></a> +<span class="sourceLineNo">1072</span><a name="line.1072"></a> +<span class="sourceLineNo">1073</span> protected void wakeServerReportEvent(final ServerStateNode serverNode) {<a name="line.1073"></a> +<span class="sourceLineNo">1074</span> serverNode.getReportEvent().wake(getProcedureScheduler());<a name="line.1074"></a> +<span class="sourceLineNo">1075</span> }<a name="line.1075"></a> +<span class="sourceLineNo">1076</span><a name="line.1076"></a> +<span class="sourceLineNo">1077</span> // ============================================================================================<a name="line.1077"></a> +<span class="sourceLineNo">1078</span> // RIT chore<a name="line.1078"></a> +<span class="sourceLineNo">1079</span> // ============================================================================================<a name="line.1079"></a> +<span class="sourceLineNo">1080</span> private static class RegionInTransitionChore extends ProcedureInMemoryChore<MasterProcedureEnv> {<a name="line.1080"></a> +<span class="sourceLineNo">1081</span> public RegionInTransitionChore(final int timeoutMsec) {<a name="line.1081"></a> +<span class="sourceLineNo">1082</span> super(timeoutMsec);<a name="line.1082"></a> +<span class="sourceLineNo">1083</span> }<a name="line.1083"></a> +<span class="sourceLineNo">1084</span><a name="line.1084"></a> +<span class="sourceLineNo">1085</span> @Override<a name="line.1085"></a> +<span class="sourceLineNo">1086</span> protected void periodicExecute(final MasterProcedureEnv env) {<a name="line.1086"></a> +<span class="sourceLineNo">1087</span> final AssignmentManager am = env.getAssignmentManager();<a name="line.1087"></a> +<span class="sourceLineNo">1088</span><a name="line.1088"></a> +<span class="sourceLineNo">1089</span> final RegionInTransitionStat ritStat = am.computeRegionInTransitionStat();<a name="line.1089"></a> +<span class="sourceLineNo">1090</span> if (ritStat.hasRegionsOverThreshold()) {<a name="line.1090"></a> +<span class="sourceLineNo">1091</span> for (RegionState hri: ritStat.getRegionOverThreshold()) {<a name="line.1091"></a> +<span class="sourceLineNo">1092</span> am.handleRegionOverStuckWarningThreshold(hri.getRegion());<a name="line.1092"></a> +<span class="sourceLineNo">1093</span> }<a name="line.1093"></a> +<span class="sourceLineNo">1094</span> }<a name="line.1094"></a> +<span class="sourceLineNo">1095</span><a name="line.1095"></a> +<span class="sourceLineNo">1096</span> // update metrics<a name="line.1096"></a> +<span class="sourceLineNo">1097</span> am.updateRegionsInTransitionMetrics(ritStat);<a name="line.1097"></a> +<span class="sourceLineNo">1098</span> }<a name="line.1098"></a> +<span class="sourceLineNo">1099</span> }<a name="line.1099"></a> +<span class="sourceLineNo">1100</span><a name="line.1100"></a> +<span class="sourceLineNo">1101</span> public RegionInTransitionStat computeRegionInTransitionStat() {<a name="line.1101"></a> +<span class="sourceLineNo">1102</span> final RegionInTransitionStat rit = new RegionInTransitionStat(getConfiguration());<a name="line.1102"></a> +<span class="sourceLineNo">1103</span> rit.update(this);<a name="line.1103"></a> +<span class="sourceLineNo">1104</span> return rit;<a name="line.1104"></a> +<span class="sourceLineNo">1105</span> }<a name="line.1105"></a> <span class="sourceLineNo">1106</span><a name="line.1106"></a> -<span class="sourceLineNo">1107</span> private HashMap<String, RegionState> ritsOverThreshold = null;<a name="line.1107"></a> -<span class="sourceLineNo">1108</span> private long statTimestamp;<a name="line.1108"></a> -<span class="sourceLineNo">1109</span> private long oldestRITTime = 0;<a name="line.1109"></a> -<span class="sourceLineNo">1110</span> private int totalRITsTwiceThreshold = 0;<a name="line.1110"></a> -<span class="sourceLineNo">1111</span> private int totalRITs = 0;<a name="line.1111"></a> -<span class="sourceLineNo">1112</span><a name="line.1112"></a> -<span class="sourceLineNo">1113</span> @VisibleForTesting<a name="line.1113"></a> -<span class="sourceLineNo">1114</span> public RegionInTransitionStat(final Configuration conf) {<a name="line.1114"></a> -<span class="sourceLineNo">1115</span> this.ritThreshold =<a name="line.1115"></a> -<span class="sourceLineNo">1116</span> conf.getInt(METRICS_RIT_STUCK_WARNING_THRESHOLD, DEFAULT_RIT_STUCK_WARNING_THRESHOLD);<a name="line.1116"></a> -<span class="sourceLineNo">1117</span> }<a name="line.1117"></a> -<span class="sourceLineNo">1118</span><a name="line.1118"></a> -<span class="sourceLineNo">1119</span> public int getRITThreshold() {<a name="line.1119"></a> -<span class="sourceLineNo">1120</span> return ritThreshold;<a name="line.1120"></a> -<span class="sourceLineNo">1121</span> }<a name="line.1121"></a> -<span class="sourceLineNo">1122</span><a name="line.1122"></a> -<span class="sourceLineNo">1123</span> public long getTimestamp() {<a name="line.1123"></a> -<span class="sourceLineNo">1124</span> return statTimestamp;<a name="line.1124"></a> -<span class="sourceLineNo">1125</span> }<a name="line.1125"></a> -<span class="sourceLineNo">1126</span><a name="line.1126"></a> -<span class="sourceLineNo">1127</span> public int getTotalRITs() {<a name="line.1127"></a> -<span class="sourceLineNo">1128</span> return totalRITs;<a name="line.1128"></a> -<span class="sourceLineNo">1129</span> }<a name="line.1129"></a> -<span class="sourceLineNo">1130</span><a name="line.1130"></a> -<span class="sourceLineNo">1131</span> public long getOldestRITTime() {<a name="line.1131"></a> -<span class="sourceLineNo">1132</span> return oldestRITTime;<a name="line.1132"></a> -<span class="sourceLineNo">1133</span> }<a name="line.1133"></a> -<span class="sourceLineNo">1134</span><a name="line.1134"></a> -<span class="sourceLineNo">1135</span> public int getTotalRITsOverThreshold() {<a name="line.1135"></a> -<span class="sourceLineNo">1136</span> Map<String, RegionState> m = this.ritsOverThreshold;<a name="line.1136"></a> -<span class="sourceLineNo">1137</span> return m != null ? m.size() : 0;<a name="line.1137"></a> -<span class="sourceLineNo">1138</span> }<a name="line.1138"></a> -<span class="sourceLineNo">1139</span><a name="line.1139"></a> -<span class="sourceLineNo">1140</span> public boolean hasRegionsTwiceOverThreshold() {<a name="line.1140"></a> -<span class="sourceLineNo">1141</span> return totalRITsTwiceThreshold > 0;<a name="line.1141"></a> -<span class="sourceLineNo">1142</span> }<a name="line.1142"></a> -<span class="sourceLineNo">1143</span><a name="line.1143"></a> -<span class="sourceLineNo">1144</span> public boolean hasRegionsOverThreshold() {<a name="line.1144"></a> -<span class="sourceLineNo">1145</span> Map<String, RegionState> m = this.ritsOverThreshold;<a name="line.1145"></a> -<span class="sourceLineNo">1146</span> return m != null && !m.isEmpty();<a name="line.1146"></a> -<span class="sourceLineNo">1147</span> }<a name="line.1147"></a> -<span class="sourceLineNo">1148</span><a name="line.1148"></a> -<span class="sourceLineNo">1149</span> public Collection<RegionState> getRegionOverThreshold() {<a name="line.1149"></a> -<span class="sourceLineNo">1150</span> Map<String, RegionState> m = this.ritsOverThreshold;<a name="line.1150"></a> -<span class="sourceLineNo">1151</span> return m != null? m.values(): Collections.EMPTY_SET;<a name="line.1151"></a> -<span class="sourceLineNo">1152</span> }<a name="line.1152"></a> -<span class="sourceLineNo">1153</span><a name="line.1153"></a> -<span class="sourceLineNo">1154</span> public boolean isRegionOverThreshold(final RegionInfo regionInfo) {<a name="line.1154"></a> -<span class="sourceLineNo">1155</span> Map<String, RegionState> m = this.ritsOverThreshold;<a name="line.1155"></a> -<span class="sourceLineNo">1156</span> return m != null && m.containsKey(regionInfo.getEncodedName());<a name="line.1156"></a> -<span class="sourceLineNo">1157</span> }<a name="line.1157"></a> -<span class="sourceLineNo">1158</span><a name="line.1158"></a> -<span class="sourceLineNo">1159</span> public boolean isRegionTwiceOverThreshold(final RegionInfo regionInfo) {<a name="line.1159"></a> -<span class="sourceLineNo">1160</span> Map<String, RegionState> m = this.ritsOverThreshold;<a name="line.1160"></a> -<span class="sourceLineNo">1161</span> if (m == null) return false;<a name="line.1161"></a> -<span class="sourceLineNo">1162</span> final RegionState state = m.get(regionInfo.getEncodedName());<a name="line.1162"></a> -<span class="sourceLineNo">1163</span> if (state == null) return false;<a name="line.1163"></a> -<span class="sourceLineNo">1164</span> return (statTimestamp - state.getStamp()) > (ritThreshold * 2);<a name="line.1164"></a> -<span class="sourceLineNo">1165</span> }<a name="line.1165"></a> -<span class="sourceLineNo">1166</span><a name="line.1166"></a> -<span class="sourceLineNo">1167</span> protected void update(final AssignmentManager am) {<a name="line.1167"></a> -<span class="sourceLineNo">1168</span> final RegionStates regionStates = am.getRegionStates();<a name="line.1168"></a> -<span class="sourceLineNo">1169</span> this.statTimestamp = EnvironmentEdgeManager.currentTime();<a name="line.1169"></a> -<span class="sourceLineNo">1170</span> update(regionStates.getRegionsStateInTransition(), statTimestamp);<a name="line.1170"></a> -<span class="sourceLineNo">1171</span> update(regionStates.getRegionFailedOpen(), statTimestamp);<a name="line.1171"></a> -<span class="sourceLineNo">1172</span> }<a name="line.1172"></a> -<span class="sourceLineNo">1173</span><a name="line.1173"></a> -<span class="sourceLineNo">1174</span> private void update(final Collection<RegionState> regions, final long currentTime) {<a name="line.1174"></a> -<span class="sourceLineNo">1175</span> for (RegionState state: regions) {<a name="line.1175"></a> -<span class="sourceLineNo">1176</span> totalRITs++;<a name="line.1176"></a> -<span class="sourceLineNo">1177</span> final long ritTime = currentTime - state.getStamp();<a name="line.1177"></a> -<span class="sourceLineNo">1178</span> if (ritTime > ritThreshold) {<a name="line.1178"></a> -<span class="sourceLineNo">1179</span> if (ritsOverThreshold == null) {<a name="line.1179"></a> -<span class="sourceLineNo">1180</span> ritsOverThreshold = new HashMap<String, RegionState>();<a name="line.1180"></a> -<span class="sourceLineNo">1181</span> }<a name="line.1181"></a> -<span class="sourceLineNo">1182</span> ritsOverThreshold.put(state.getRegion().getEncodedName(), state);<a name="line.1182"></a> -<span class="sourceLineNo">1183</span> totalRITsTwiceThreshold += (ritTime > (ritThreshold * 2)) ? 1 : 0;<a name="line.1183"></a> -<span class="sourceLineNo">1184</span> }<a name="line.1184"></a> -<span class="sourceLineNo">1185</span> if (oldestRITTime < ritTime) {<a name="line.1185"></a> -<span class="sourceLineNo">1186</span> oldestRITTime = ritTime;<a name="line.1186"></a> +<span class="sourceLineNo">1107</span> public static class RegionInTransitionStat {<a name="line.1107"></a> +<span class="sourceLineNo">1108</span> private final int ritThreshold;<a name="line.1108"></a> +<span class="sourceLineNo">1109</span><a name="line.1109"></a> +<span class="sourceLineNo">1110</span> private HashMap<String, RegionState> ritsOverThreshold = null;<a name="line.1110"></a> +<span class="sourceLineNo">1111</span> private long statTimestamp;<a name="line.1111"></a> +<span class="sourceLineNo">1112</span> private long oldestRITTime = 0;<a name="line.1112"></a> +<span class="sourceLineNo">1113</span> private int totalRITsTwiceThreshold = 0;<a name="line.1113"></a> +<span class="sourceLineNo">1114</span> private int totalRITs = 0;<a name="line.1114"></a> +<span class="sourceLineNo">1115</span><a name="line.1115"></a> +<span class="sourceLineNo">1116</span> @VisibleForTesting<a name="line.1116"></a> +<span class="sourceLineNo">1117</span> public RegionInTransitionStat(final Configuration conf) {<a name="line.1117"></a> +<span class="sourceLineNo">1118</span> this.ritThreshold =<a name="line.1118"></a> +<span class="sourceLineNo">1119</span> conf.getInt(METRICS_RIT_STUCK_WARNING_THRESHOLD, DEFAULT_RIT_STUCK_WARNING_THRESHOLD);<a name="line.1119"></a> +<span class="sourceLineNo">1120</span> }<a name="line.1120"></a> +<span class="sourceLineNo">1121</span><a name="line.1121"></a> +<span class="sourceLineNo">1122</span> public int getRITThreshold() {<a name="line.1122"></a> +<span class="sourceLineNo">1123</span> return ritThreshold;<a name="line.1123"></a> +<span class="sourceLineNo">1124</span> }<a name="line.1124"></a> +<span class="sourceLineNo">1125</span><a name="line.1125"></a> +<span class="sourceLineNo">1126</span> public long getTimestamp() {<a name="line.1126"></a> +<span class="sourceLineNo">1127</span> return statTimestamp;<a name="line.1127"></a> +<span class="sourceLineNo">1128</span> }<a name="line.1128"></a> +<span class="sourceLineNo">1129</span><a name="line.1129"></a> +<span class="sourceLineNo">1130</span> public int getTotalRITs() {<a name="line.1130"></a> +<span class="sourceLineNo">1131</span> return totalRITs;<a name="line.1131"></a> +<span class="sourceLineNo">1132</span> }<a name="line.1132"></a> +<span class="sourceLineNo">1133</span><a name="line.1133"></a> +<span class="sourceLineNo">1134</span> public long getOldestRITTime() {<a name="line.1134"></a> +<span class="sourceLineNo">1135</span> return oldestRITTime;<a name="line.1135"></a> +<span class="sourceLineNo">1136</span> }<a name="line.1136"></a> +<span class="sourceLineNo">1137</span><a name="line.1137"></a> +<span class="sourceLineNo">1138</span> public int getTotalRITsOverThreshold() {<a name="line.1138"></a> +<span class="sourceLineNo">1139</span> Map<String, RegionState> m = this.ritsOverThreshold;<a name="line.1139"></a> +<span class="sourceLineNo">1140</span> return m != null ? m.size() : 0;<a name="line.1140"></a> +<span class="sourceLineNo">1141</span> }<a name="line.1141"></a> +<span class="sourceLineNo">1142</span><a name="line.1142"></a> +<span class="sourceLineNo">1143</span> public boolean hasRegionsTwiceOverThreshold() {<a name="line.1143"></a> +<span class="sourceLineNo">1144</span> return totalRITsTwiceThreshold > 0;<a name="line.1144"></a> +<span class="sourceLineNo">1145</span> }<a name="line.1145"></a> +<span class="sourceLineNo">1146</span><a name="line.1146"></a> +<span class="sourceLineNo">1147</span> public boolean hasRegionsOverThreshold() {<a name="line.1147"></a> +<span class="sourceLineNo">1148</span> Map<String, RegionState> m = this.ritsOverThreshold;<a name="line.1148"></a> +<span class="sourceLineNo">1149</span> return m != null && !m.isEmpty();<a name="line.1149"></a> +<span class="sourceLineNo">1150</span> }<a name="line.1150"></a> +<span class="sourceLineNo">1151</span><a name="line.1151"></a> +<span class="sourceLineNo">1152</span> public Collection<RegionState> getRegionOverThreshold() {<a name="line.1152"></a> +<span class="sourceLineNo">1153</span> Map<String, RegionState> m = this.ritsOverThreshold;<a name="line.1153"></a> +<span class="sourceLineNo">1154</span> return m != null? m.values(): Collections.EMPTY_SET;<a name="line.1154"></a> +<span class="sourceLineNo">1155</span> }<a name="line.1155"></a> +<span class="sourceLineNo">1156</span><a name="line.1156"></a> +<span class="sourceLineNo">1157</span> public boolean isRegionOverThreshold(final RegionInfo regionInfo) {<a name="line.1157"></a> +<span class="sourceLineNo">1158</span> Map<String, RegionState> m = this.ritsOverThreshold;<a name="line.1158"></a> +<span class="sourceLineNo">1159</span> return m != null && m.containsKey(regionInfo.getEncodedName());<a name="line.1159"></a> +<span class="sourceLineNo">1160</span> }<a name="line.1160"></a> +<span class="sourceLineNo">1161</span><a name="line.1161"></a> +<span class="sourceLineNo">1162</span> public boolean isRegionTwiceOverThreshold(final RegionInfo regionInfo) {<a name="line.1162"></a> +<span class="sourceLineNo">1163</span> Map<String, RegionState> m = this.ritsOverThreshold;<a name="line.1163"></a> +<span class="sourceLineNo">1164</span> if (m == null) return false;<a name="line.1164"></a> +<span class="sourceLineNo">1165</span> final RegionState state = m.get(regionInfo.getEncodedName());<a name="line.1165"></a> +<span class="sourceLineNo">1166</span> if (state == null) return false;<a name="line.1166"></a> +<span class="sourceLineNo">1167</span> return (statTimestamp - state.getStamp()) > (ritThreshold * 2);<a name="line.1167"></a> +<span class="sourceLineNo">1168</span> }<a name="line.1168"></a> +<span class="sourceLineNo">1169</span><a name="line.1169"></a> +<span class="sourceLineNo">1170</span> protected void update(final AssignmentManager am) {<a name="line.1170"></a> +<span class="sourceLineNo">1171</span> final RegionStates regionStates = am.getRegionStates();<a name="line.1171"></a> +<span class="sourceLineNo">1172</span> this.statTimestamp = EnvironmentEdgeManager.currentTime();<a name="line.1172"></a> +<span class="sourceLineNo">1173</span> update(regionStates.getRegionsStateInTransition(), statTimestamp);<a name="line.1173"></a> +<span class="sourceLineNo">1174</span> update(regionStates.getRegionFailedOpen(), statTimestamp);<a name="line.1174"></a> +<span class="sourceLineNo">1175</span> }<a name="line.1175"></a> +<span class="sourceLineNo">1176</span><a name="line.1176"></a> +<span class="sourceLineNo">1177</span> private void update(final Collection<RegionState> regions, final long currentTime) {<a name="line.1177"></a> +<span class="sourceLineNo">1178</span> for (RegionState state: regions) {<a name="line.1178"></a> +<span class="sourceLineNo">1179</span> totalRITs++;<a name="line.1179"></a> +<span class="sourceLineNo">1180</span> final long ritTime = currentTime - state.getStamp();<a name="line.1180"></a> +<span class="sourceLineNo">1181</span> if (ritTime > ritThreshold) {<a name="line.1181"></a> +<span class="sourceLineNo">1182</span> if (ritsOverThreshold == null) {<a name="line.1182"></a> +<span class="sourceLineNo">1183</span> ritsOverThreshold = new HashMap<String, RegionState>();<a name="line.1183"></a> +<span class="sourceLineNo">1184</span> }<a name="line.1184"></a> +<span class="sourceLineNo">1185</span> ritsOverThreshold.put(state.getRegion().getEncodedName(), state);<a name="line.1185"></a> +<span class="sourceLineNo">1186</span> totalRITsTwiceThreshold += (ritTime > (ritThreshold * 2)) ? 1 : 0;<a name="line.1186"></a> <span class="sourceLineNo">1187</span> }<a name="line.1187"></a> -<span class="sourceLineNo">1188</span> }<a name="line.1188"></a> -<span class="sourceLineNo">1189</span> }<a name="line.1189"></a> -<span class="sourceLineNo">1190</span> }<a name="line.1190"></a> -<span class="sourceLineNo">1191</span><a name="line.1191"></a> -<span class="sourceLineNo">1192</span> private void updateRegionsInTransitionMetrics(final RegionInTransitionStat ritStat) {<a name="line.1192"></a> -<span class="sourceLineNo">1193</span> metrics.updateRITOldestAge(ritStat.getOldestRITTime());<a name="line.1193"></a> -<span class="sourceLineNo">1194</span> metrics.updateRITCount(ritStat.getTotalRITs());<a name="line.1194"></a> -<span class="sourceLineNo">1195</span> metrics.updateRITCountOverThreshold(ritStat.getTotalRITsOverThreshold());<a name="line.1195"></a> -<span class="sourceLineNo">1196</span> }<a name="line.1196"></a> -<span class="sourceLineNo">1197</span><a name="line.1197"></a> -<span class="sourceLineNo">1198</span> private void handleRegionOverStuckWarningThreshold(final RegionInfo regionInfo) {<a name="line.1198"></a> -<span class="sourceLineNo">1199</span> final RegionStateNode regionNode = regionStates.getRegionStateNode(regionInfo);<a name="line.1199"></a> -<span class="sourceLineNo">1200</span> //if (regionNode.isStuck()) {<a name="line.1200"></a> -<span class="sourceLineNo">1201</span> LOG.warn("STUCK Region-In-Transition {}", regionNode);<a name="line.1201"></a> -<span class="sourceLineNo">1202</span> }<a name="line.1202"></a> -<span class="sourceLineNo">1203</span><a name="line.1203"></a> -<span class="sourceLineNo">1204</span> // ============================================================================================<a name="line.1204"></a> -<span class="sourceLineNo">1205</span> // TODO: Master load/bootstrap<a name="line.1205"></a> -<span class="sourceLineNo">1206</span> // ============================================================================================<a name="line.1206"></a> -<span class="sourceLineNo">1207</span> public void joinCluster() throws IOException {<a name="line.1207"></a> -<span class="sourceLineNo">1208</span> final long startTime = System.currentTimeMillis();<a name="line.1208"></a> -<span class="sourceLineNo">1209</span> LOG.debug("Joining cluster...");<a name="line.1209"></a> -<span class="sourceLineNo">1210</span><a name="line.1210"></a> -<span class="sourceLineNo">1211</span> // Scan hbase:meta to build list of existing regions, servers, and assignment<a name="line.1211"></a> -<span class="sourceLineNo">1212</span> // hbase:meta is online when we get to here and TableStateManager has been started.<a name="line.1212"></a> -<span class="sourceLineNo">1213</span> loadMeta();<a name="line.1213"></a> -<span class="sourceLineNo">1214</span><a name="line.1214"></a> -<span class="sourceLineNo">1215</span> for (int i = 0; master.getServerManager().countOfRegionServers() < 1; ++i) {<a name="line.1215"></a> -<span class="sourceLineNo">1216</span> LOG.info("Waiting for RegionServers to join; current count=" +<a name="line.1216"></a> -<span class="sourceLineNo">1217</span> master.getServerManager().countOfRegionServers());<a name="line.1217"></a> -<span class="sourceLineNo">1218</span> Threads.sleep(250);<a name="line.1218"></a> -<span class="sourceLineNo">1219</span> }<a name="line.1219"></a> -<span class="sourceLineNo">1220</span> LOG.info("Number of RegionServers=" + master.getServerManager().countOfRegionServers());<a name="line.1220"></a> -<span class="sourceLineNo">1221</span><a name="line.1221"></a> -<span class="sourceLineNo">1222</span> boolean failover = processofflineServersWithOnlineRegions();<a name="line.1222"></a> -<span class="sourceLineNo">1223</span><a name="line.1223"></a> -<span class="sourceLineNo">1224</span> // Start the RIT chore<a name="line.1224"></a> -<span class="sourceLineNo">1225</span> master.getMasterProcedureExecutor().addChore(this.ritChore);<a name="line.1225"></a> +<span class="sourceLineNo">1188</span> if (oldestRITTime < ritTime) {<a name="line.1188"></a> +<span class="sourceLineNo">1189</span> oldestRITTime = ritTime;<a name="line.1189"></a> +<span class="sourceLineNo">1190</span> }<a name="line.1190"></a> +<span class="sourceLineNo">1191</span> }<a name="line.1191"></a> +<span class="sourceLineNo">1192</span> }<a name="line.1192"></a> +<span class="sourceLineNo">1193</span> }<a name="line.1193"></a> +<span class="sourceLineNo">1194</span><a name="line.1194"></a> +<span class="sourceLineNo">1195</span> private void updateRegionsInTransitionMetrics(final RegionInTransitionStat ritStat) {<a name="line.1195"></a> +<span class="sourceLineNo">1196</span> metrics.updateRITOldestAge(ritStat.getOldestRITTime());<a name="line.1196"></a> +<span class="sourceLineNo">1197</span> metrics.updateRITCount(ritStat.getTotalRITs());<a name="line.1197"></a> +<span class="sourceLineNo">1198</span> metrics.updateRITCountOverThreshold(ritStat.getTotalRITsOverThreshold());<a name="line.1198"></a> +<span class="sourceLineNo">1199</span> }<a name="line.1199"></a> +<span class="sourceLineNo">1200</span><a name="line.1200"></a> +<span class="sourceLineNo">1201</span> private void handleRegionOverStuckWarningThreshold(final RegionInfo regionInfo) {<a name="line.1201"></a> +<span class="sourceLineNo">1202</span> final RegionStateNode regionNode = regionStates.getRegionStateNode(regionInfo);<a name="line.1202"></a> +<span class="sourceLineNo">1203</span> //if (regionNode.isStuck()) {<a name="line.1203"></a> +<span class="sourceLineNo">1204</span> LOG.warn("STUCK Region-In-Transition {}", regionNode);<a name="line.1204"></a> +<span class="sourceLineNo">1205</span> }<a name="line.1205"></a> +<span class="sourceLineNo">1206</span><a name="line.1206"></a> +<span class="sourceLineNo">1207</span> // ============================================================================================<a name="line.1207"></a> +<span class="sourceLineNo">1208</span> // TODO: Master load/bootstrap<a name="line.1208"></a> +<span class="sourceLineNo">1209</span> // ============================================================================================<a name="line.1209"></a> +<span class="sourceLineNo">1210</span> public void joinCluster() throws IOException {<a name="line.1210"></a> +<span class="sourceLineNo">1211</span> final long startTime = System.currentTimeMillis();<a name="line.1211"></a> +<span class="sourceLineNo">1212</span> LOG.debug("Joining cluster...");<a name="line.1212"></a> +<span class="sourceLineNo">1213</span><a name="line.1213"></a> +<span class="sourceLineNo">1214</span> // Scan hbase:meta to build list of existing regions, servers, and assignment<a name="line.1214"></a> +<span class="sourceLineNo">1215</span> // hbase:meta is online when we get to here and TableStateManager has been started.<a name="line.1215"></a> +<span class="sourceLineNo">1216</span> loadMeta();<a name="line.1216"></a> +<span class="sourceLineNo">1217</span><a name="line.1217"></a> +<span class="sourceLineNo">1218</span> for (int i = 0; master.getServerManager().countOfRegionServers() < 1; ++i) {<a name="line.1218"></a> +<span class="sourceLineNo">1219</span> LOG.info("Waiting for RegionServers to join; current count=" +<a name="line.1219"></a> +<span class="sourceLineNo">1220</span> master.getServerManager().countOfRegionServers());<a name="line.1220"></a> +<span class="sourceLineNo">1221</span> Threads.sleep(250);<a name="line.1221"></a> +<span class="sourceLineNo">1222</span> }<a name="line.1222"></a> +<span class="sourceLineNo">1223</span> LOG.info("Number of RegionServers=" + master.getServerManager().countOfRegionServers());<a name="line.1223"></a> +<span class="sourceLineNo">1224</span><a name="line.1224"></a> +<span class="sourceLineNo">1225</span> boolean failover = processofflineServersWithOnlineRegions();<a name="line.1225"></a> <span class="sourceLineNo">1226</span><a name="line.1226"></a> -<span class="sourceLineNo">1227</span> LOG.info(String.format("Joined the cluster in %s, failover=%s",<a name="line.1227"></a> -<span class="sourceLineNo">1228</span> StringUtils.humanTimeDiff(System.currentTimeMillis() - startTime), failover));<a name="line.1228"></a> -<span class="sourceLineNo">1229</span> }<a name="line.1229"></a> -<span class="sourceLineNo">1230</span><a name="line.1230"></a> -<span class="sourceLineNo">1231</span> private void loadMeta() throws IOException {<a name="line.1231"></a> -<span class="sourceLineNo">1232</span> // TODO: use a thread pool<a name="line.1232"></a> -<span class="sourceLineNo">1233</span> regionStateStore.visitMeta(new RegionStateStore.RegionStateVisitor() {<a name="line.1233"></a> -<span class="sourceLineNo">1234</span> @Override<a name="line.1234"></a> -<span class="sourceLineNo">1235</span> public void visitRegionState(final RegionInfo regionInfo, final State state,<a name="line.1235"></a> -<span class="sourceLineNo">1236</span> final ServerName regionLocation, final ServerName lastHost, final long openSeqNum) {<a name="line.1236"></a> -<span class="sourceLineNo">1237</span> final RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo);<a name="line.1237"></a> -<span class="sourceLineNo">1238</span> State localState = state;<a name="line.1238"></a> -<span class="sourceLineNo">1239</span> if (localState == null) {<a name="line.1239"></a> -<span class="sourceLineNo">1240</span> // No region state column data in hbase:meta table! Are I doing a rolling upgrade from<a name="line.1240"></a> -<span class="sourceLineNo">1241</span> // hbase1 to hbase2? Am I restoring a SNAPSHOT or otherwise adding a region to hbase:meta?<a name="line.1241"></a> -<span class="sourceLineNo">1242</span> // In any of these cases, state is empty. For now, presume OFFLINE but there are probably<a name="line.1242"></a> -<span class="sourceLineNo">1243</span> // cases where we need to probe more to be sure this correct; TODO informed by experience.<a name="line.1243"></a> -<span class="sourceLineNo">1244</span> LOG.info(regionInfo.getEncodedName() + " regionState=null; presuming " + State.OFFLINE);<a name="line.1244"></a> -<span class="sourceLineNo">1245</span> localState = State.OFFLINE;<a name="line.1245"></a> -<span class="sourceLineNo">1246</span> }<a name="line.1246"></a> -<span class="sourceLineNo">1247</span> synchronized (regionNode) {<a name="line.1247"></a> -<span class="sourceLineNo">1248</span> if (!regionNode.isInTransition()) {<a name="line.1248"></a> -<span class="sourceLineNo">1249</span> regionNode.setState(localState);<a name="line.1249"></a> -<span class="sourceLineNo">1250</span> regionNode.setLastHost(lastHost);<a name="line.1250"></a> -<span class="sourceLineNo">1251</span> regionNode.setRegionLocation(regionLocation);<a name="line.1251"></a> -<span class="sourceLineNo">1252</span> regionNode.setOpenSeqNum(openSeqNum);<a name="line.1252"></a> -<span class="sourceLineNo">1253</span><a name="line.1253"></a> -<span class="sourceLineNo">1254</span> if (localState == State.OPEN) {<a name="line.1254"></a> -<span class="sourceLineNo">1255</span> assert regionLocation != null : "found null region location for " + regionNode;<a name="line.1255"></a> -<span class="sourceLineNo">1256</span> regionStates.addRegionToServer(regionNode);<a name="line.1256"></a> -<span class="sourceLineNo">1257</span> } else if (localState == State.OFFLINE || regionInfo.isOffline()) {<a name="line.1257"></a> -<span class="sourceLineNo">1258</span> regionStates.addToOfflineRegions(regionNode);<a name="line.1258"></a> -<span class="sourceLineNo">1259</span> } else if (localState == State.CLOSED && getTableStateManager().<a name="line.1259"></a> -<span class="sourceLineNo">1260</span> isTableState(regionNode.getTable(), TableState.State.DISABLED,<a name="line.1260"></a> -<span class="sourceLineNo">1261</span> TableState.State.DISABLING)) {<a name="line.1261"></a> -<span class="sourceLineNo">1262</span> // The region is CLOSED and the table is DISABLED/ DISABLING, there is nothing to<a name="line.1262"></a> -<span class="sourceLineNo">1263</span> // schedule; the region is inert.<a name="line.1263"></a> -<span class="sourceLineNo">1264</span> } else {<a name="line.1264"></a> -<span class="sourceLineNo">1265</span> // These regions should have a procedure in replay<a name="line.1265"></a> -<span class="sourceLineNo">1266</span> regionStates.addRegionInTransition(regionNode, null);<a name="line.1266"></a> -<span class="sourceLineNo">1267</span> }<a name="line.1267"></a> -<span class="sourceLineNo">1268</span> }<a name="line.1268"></a> -<span class="sourceLineNo">1269</span> }<a name="line.1269"></a> -<span class="sourceLineNo">1270</span> }<a name="line.1270"></a> -<span class="sourceLineNo">1271</span> });<a name="line.1271"></a> -<span class="sourceLineNo">1272</span><a name="line.1272"></a> -<span class="sourceLineNo">1273</span> // every assignment is blocked until meta is loaded.<a name="line.1273"></a> -<span class="sourceLineNo">1274</span> wakeMetaLoadedEvent();<a name="line.1274"></a> -<span class="sourceLineNo">1275</span> }<a name="line.1275"></a> -<span class="sourceLineNo">1276</span><a name="line.1276"></a> -<span class="sourceLineNo">1277</span> /**<a name="line.1277"></a> -<span class="sourceLineNo">1278</span> * Look at what is in meta and the list of servers that have checked in and make reconciliation.<a name="line.1278"></a> -<span class="sourceLineNo">1279</span> * We cannot tell definitively the difference between a clean shutdown and a cluster that has<a name="line.1279"></a> -<span class="sourceLineNo">1280</span> * been crashed down. At this stage of a Master startup, they look the same: they have the<a name="line.1280"></a> -<span class="sourceLineNo">1281</span> * same state in hbase:meta. We could do detective work probing ZK and the FS for old WALs to<a name="line.1281"></a> -<span class="sourceLineNo">1282</span> * split but SCP does this already so just let it do its job.<a name="line.1282"></a> -<span class="sourceLineNo">1283</span> * <p>>The profiles of clean shutdown and cluster crash-down are the same because on clean<a name="line.1283"></a> -<span class="sourceLineNo">1284</span> * shutdown currently, we do not update hbase:meta with region close state (In AMv2, region<a name="line.1284"></a> -<span class="sourceLineNo">1285</span> * state is kept in hbse:meta). Usually the master runs all region transitions as of AMv2 but on<a name="line.1285"></a> -<span class="sourceLineNo">1286</span> * cluster controlled shutdown, the RegionServers close all their regions only reporting the<a name="line.1286"></a> -<span class="sourceLineNo">1287</span> * final change to the Master. Currently this report is ignored. Later we could take it and<a name="line.1287"></a> -<span class="sourceLineNo">1288</span> * update as many regions as we can before hbase:meta goes down or have the master run the<a name="line.1288"></a> -<span class="sourceLineNo">1289</span> * close of all regions out on the cluster but we may never be able to achieve the proper state on<a name="line.1289"></a> -<span class="sourceLineNo">1290</span> * all regions (at least not w/o lots of painful manipulations and waiting) so clean shutdown<a name="line.1290"></a> -<span class="sourceLineNo">1291</span> * might not be possible especially on big clusters.... And clean shutdown will take time. Given<a name="line.1291"></a> -<span class="sourceLineNo">1292</span> * this current state of affairs, we just run ServerCrashProcedure in both cases. It will always<a name="line.1292"></a> -<span class="sourceLineNo">1293</span> * do the right thing.<a name="line.1293"></a> -<span class="sourceLineNo">1294</span> * @return True if for sure this is a failover where a Master is starting up into an already<a name="line.1294"></a> -<span class="sourceLineNo">1295</span> * running cluster.<a name="line.1295"></a> -<span class="sourceLineNo">1296</span> */<a name="line.1296"></a> -<span class="sourceLineNo">1297</span> // The assumption here is that if RSs are crashing while we are executing this<a name="line.1297"></a> -<span class="sourceLineNo">1298</span> // they will be handled by the SSH that are put in the ServerManager deadservers "queue".<a name="line.1298"></a> -<span class="sourceLineNo">1299</span> private boolean processofflineServersWithOnlineRegions() {<a name="line.1299"></a> -<span class="sourceLineNo">1300</span> boolean deadServers = !master.getServerManager().getDeadServers().isEmpty();<a name="line.1300"></a> -<span class="sourceLineNo">1301</span> final Set<ServerName> offlineServersWithOnlineRegions = new HashSet<>();<a name="line.1301"></a> -<span class="sourceLineNo">1302</span> int size = regionStates.getRegionStateNodes().size();<a name="line.1302"></a> -<span class="sourceLineNo">1303</span> final List<RegionInfo> offlineRegionsToAssign = new ArrayList<>(size);<a name="line.1303"></a> -<span class="sourceLineNo">1304</span> // If deadservers then its a failover, else, we are not sure yet.<a name="line.1304"></a> -<span class="sourceLineNo">1305</span> boolean failover = deadServers;<a name="line.1305"></a> -<span class="sourceLineNo">1306</span> for (RegionStateNode regionNode: regionStates.getRegionStateNodes()) {<a name="line.1306"></a> -<span class="sourceLineNo">1307</span> // Region State can be OPEN even if we did controlled cluster shutdown; Master does not close<a name="line.1307"></a> -<span class="sourceLineNo">1308</span> // the regions in this case. The RegionServer does the close so hbase:meta is state in<a name="line.1308"></a> -<span class="sourceLineNo">1309</span> // hbase:meta is not updated -- Master does all updates -- and is left with OPEN as region<a name="line.1309"></a> -<span class="sourceLineNo">1310</span> // state in meta. How to tell difference between ordered shutdown and crashed-down cluster<a name="line.1310"></a> -<span class="sourceLineNo">1311</span> // then? We can't. Not currently. Perhaps if we updated hbase:meta with CLOSED on ordered<a name="line.1311"></a> -<span class="sourceLineNo">1312</span> // shutdown. This would slow shutdown though and not all edits would make it in anyways.<a name="line.1312"></a> -<span class="sourceLineNo">1313</span> // TODO: Examine.<a name="line.1313"></a> -<span class="sourceLineNo">1314</span> // Because we can't be sure it an ordered shutdown, we run ServerCrashProcedure always.<a name="line.1314"></a> -<span class="sourceLineNo">1315</span> // ServerCrashProcedure will try to retain old deploy when it goes to assign.<a name="line.1315"></a> -<span class="sourceLineNo">1316</span> if (regionNode.getState() == State.OPEN) {<a name="line.1316"></a> -<span class="sourceLineNo">1317</span> final ServerName serverName = regionNode.getRegionLocation();<a name="line.1317"></a> -<span class="sourceLineNo">1318</span> if (!master.getServerManager().isServerOnline(serverName)) {<a name="line.1318"></a> -<span class="sourceLineNo">1319</span> offlineServersWithOnlineRegions.add(serverName);<a name="line.1319"></a> -<span class="sourceLineNo">1320</span> } else {<a name="line.1320"></a> -<span class="sourceLineNo">1321</span> // Server is online. This a failover. Master is starting into already-running cluster.<a name="line.1321"></a> -<span class="sourceLineNo">1322</span> failover = true;<a name="line.1322"></a> -<span class="sourceLineNo">1323</span> }<a name="line.1323"></a> -<span class="sourceLineNo">1324</span> } else if (regionNode.getState() == State.OFFLINE) {<a name="line.1324"></a> -<span class="sourceLineNo">1325</span> if (isTableEnabled(regionNode.getTable())) {<a name="line.1325"></a> -<span class="sourceLineNo">1326</span> offlineRegionsToAssign.add(regionNode.getRegionInfo());<a name="line.1326"></a> -<span class="sourceLineNo">1327</span> }<a name="line.1327"></a> -<span class="sourceLineNo">1328</span> }<a name="line.1328"></a> -<span class="sourceLineNo">1329</span> }<a name="line.1329"></a> -<span class="sourceLineNo">1330</span> // Kill servers with online regions just-in-case. Runs ServerCrashProcedure.<a name="line.1330"></a> -<span class="sourceLineNo">1331</span> for (ServerName serverName: offlineServersWithOnlineRegions) {<a name="line.1331"></a> -<span class="sourceLineNo">1332</span> if (!master.getServerManager().isServerOnline(serverName)) {<a name="line.1332"></a> -<span class="sourceLineNo">1333</span> LOG.info("KILL RegionServer=" + serverName + " hosting regions but not online.");<a name="line.1333"></a> -<span class="sourceLineNo">1334</span> killRegionServer(serverName);<a name="line.1334"></a> -<span class="sourceLineNo">1335</span> }<a name="line.1335"></a> -<span class="sourceLineNo">1336</span> }<a name="line.1336"></a> -<span class="sourceLineNo">1337</span> setFailoverCleanupDone(true);<a name="line.1337"></a> -<span class="sourceLineNo">1338</span><a name="line.1338"></a> -<span class="sourceLineNo">1339</span> // Assign offline regions. Uses round-robin.<a name="line.1339"></a> -<span class="sourceLineNo">1340</span> if (offlineRegionsToAssign.size() > 0) {<a name="line.1340"></a> -<span class="sourceLineNo">1341</span> master.getMasterProcedureExecutor().submitProcedures(master.getAssignmentManager().<a name="line.1341"></a> -<span class="sourceLineNo">1342</span> createRoundRobinAssignProcedures(offlineRegionsToAssign));<a name="line.1342"></a> -<span class="sourceLineNo">1343</span> }<a name="line.1343"></a> -<span class="sourceLineNo">1344</span><a name="line.1344"></a> -<span class="sourceLineNo">1345</span> return failover;<a name="line.1345"></a> -<span class="sourceLineNo">1346</span> }<a name="line.1346"></a> +<span class="sourceLineNo">1227</span> // Start the RIT chore<a name="line.1227"></a> +<span class="sourceLineNo">1228</span> master.getMasterProcedureExecutor().addChore(this.ritChore);<a name="line.1228"></a> +<span class="sourceLineNo">1229</span><a name="line.1229"></a> +<span class="sourceLineNo">1230</span> LOG.info(String.format("Joined the cluster in %s, failover=%s",<a name="line.1230"></a> +<span class="sourceLineNo">1231</span> StringUtils.humanTimeDiff(System.currentTimeMillis() - startTime), failover));<a name="line.1231"></a> +<span class="sourceLineNo">1232</span> }<a name="line.1232"></a> +<span class="sourceLineNo">1233</span><a name="line.1233"></a> +<span class="sourceLineNo">1234</span> private void loadMeta() throws IOException {<a name="line.1234"></a> +<span class="sourceLineNo">1235</span> // TODO: use a thread pool<a name="line.1235"></a> +<span class="sourceLineNo">1236</span> regionStateStore.visitMeta(new RegionStateStore.RegionStateVisitor() {<a name="line.1236"></a> +<span class="sourceLineNo">1237</span> @Override<a name="line.1237"></a> +<span class="sourceLineNo">1238</span> public void visitRegionState(final RegionInfo regionInfo, final State state,<a name="line.1238"></a> +<span class="sourceLineNo">1239</span> final ServerName regionLocation, final ServerName lastHost, final long openSeqNum) {<a name="line.1239"></a> +<span class="sourceLineNo">1240</span> final RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo);<a name="line.1240"></a> +<span class="sourceLineNo">1241</span> State localState = state;<a name="line.1241"></a> +<span class="sourceLineNo">1242</span> if (localState == null) {<a name="line.1242"></a> +<span class="sourceLineNo">1243</span> // No region state column data in hbase:meta table! Are I doing a rolling upgrade from<a name="line.1243"></a> +<span class="sourceLineNo">1244</span> // hbase1 to hbase2? Am I restoring a SNAPSHOT or otherwise adding a region to hbase:meta?<a name="line.1244"></a> +<span class="sourceLineNo">1245</span> // In any of these cases, state is empty. For now, presume OFFLINE but there are probably<a name="line.1245"></a> +<span class="sourceLineNo">1246</span> // cases where we need to probe more to be sure this correct; TODO informed by experience.<a name="line.1246"></a> +<span class="sourceLineNo">1247</span> LOG.info(regionInfo.getEncodedName() + " regionState=null; presuming " + State.OFFLINE);<a name="line.1247"></a> +<span class="sourceLineNo">1248</span> localState = State.OFFLINE;<a name="line.1248"></a> +<span class="sourceLineNo">1249</span> }<a name="line.1249"></a> +<span class="sourceLineNo">1250</span> synchronized (regionNode) {<a name="line.1250"></a> +<span class="sourceLineNo">1251</span> if (!regionNode.isInTransition()) {<a name="line.1251"></a> +<span class="sourceLineNo">1252</span> regionNode.setState(localState);<a name="line.1252"></a> +<span class="sourceLineNo">1253</span> regionNode.setLastHost(lastHost);<a name="line.1253"></a> +<span class="sourceLineNo">1254</span> regionNode.setRegionLocation(regionLocation);<a name="line.1254"></a> +<span class="sourceLineNo">1255</span> regionNode.setOpenSeqNum(openSeqNum);<a name="line.1255"></a> +<span class="sourceLineNo">1256</span><a name="line.1256"></a> +<span class="sourceLineNo">1257</span> if (localState == State.OPEN) {<a name="line.1257"></a> +<span class="sourceLineNo">1258</span> assert regionLocation != null : "found null region location for " + regionNode;<a name="line.1258"></a> +<span class="sourceLineNo">1259</span> regionStates.addRegionToServer(regionNode);<a name="line.1259"></a> +<span class="sourceLineNo">1260</span> } else if (localState == State.OFFLINE || regionInfo.isOffline()) {<a name="line.1260"></a> +<span class="sourceLineNo">1261</span> regionStates.addToOfflineRegions(regionNode);<a name="line.1261"></a> +<span class="sourceLineNo">1262</span> } else if (localState == State.CLOSED && getTableStateManager().<a name="line.1262"></a> +<span class="sourceLineNo">1263</span> isTableState(regionNode.getTable(), TableState.State.DISABLED,<a name="line.1263"></a> +<span class="sourceLineNo">1264</span> TableState.State.DISABLING)) {<a name="line.1264"></a> +<span class="sourceLineNo">1265</span> // The region is CLOSED and the table is DISABLED/ DISABLING, there is nothing to<a name="line.1265"></a> +<span class="sourceLineNo">1266</span> // schedule; the region is inert.<a name="line.1266"></a> +<span class="sourceLineNo">1267</span> } else {<a name="line.1267"></a> +<span class="sourceLineNo">1268</span> // These regions should have a procedure in replay<a name="line.1268"></a> +<span class="sourceLineNo">1269</span> regionStates.addRegionInTransition(regionNode, null);<a name="line.1269"></a> +<span class="sourceLineNo">1270</span> }<a name="line.1270"></a> +<span class="sourceLineNo">1271</span> }<a name="line.1271"></a> +<span class="sourceLineNo">1272</span> }<a name="line.1272"></a> +<span class="sourceLineNo">1273</span> }<a name="line.1273"></a> +<span class="sourceLineNo">1274</span> });<a name="line.1274"></a> +<span class="sourceLineNo">1275</span><a name="line.1275"></a> +<span class="sourceLineNo">1276</span> // every assignment is blocked until meta is loaded.<a name="line.1276"></a> +<span class="sourceLineNo">1277</span> wakeMetaLoadedEvent();<a name="line.1277"></a> +<span class="sourceLineNo">1278</span> }<a name="line.1278"></a> +<span class="sourceLineNo">1279</span><a name="line.1279"></a> +<span class="sourceLineNo">1280</span> /**<a name="line.1280"></a> +<span class="sourceLineNo">1281</span> * Look at what is in meta and the list of servers that have checked in and make reconciliation.<a name="line.1281"></a> +<span class="sourceLineNo">1282</span> * We cannot tell definitively the difference between a clean shutdown and a cluster that has<a name="line.1282"></a> +<span class="sourceLineNo">1283</span> * been crashed down. At this stage of a Master startup, they look the same: they have the<a name="line.1283"></a> +<span class="sourceLineNo">1284</span> * same state in hbase:meta. We could do detective work probing ZK and the FS for old WALs to<a name="line.1284"></a> +<span class="sourceLineNo">1285</span> * split but SCP does this already so just let it do its job.<a name="line.1285"></a> +<span class="sourceLineNo">1286</span> * <p>>The profiles of clean shutdown and cluster crash-down are the same because on clean<a name="line.1286"></a> +<span class="sourceLineNo">1287</span> * shutdown currently, we do not update hbase:meta with region close state (In AMv2, region<a name="line.1287"></a> +<span class="sourceLineNo">1288</span> * state is kept in hbse:meta). Usually the master runs all region transitions as of AMv2 but on<a name="line.1288"></a> +<span class="sourceLineNo">1289</span> * cluster controlled shutdown, the RegionServers close all their regions only reporting the<a name="line.1289"></a> +<span class="sourceLineNo">1290</span> * final change to the Master. Currently this report is ignored. Later we could take it and<a name="line.1290"></a> +<span class="sourceLineNo">1291</span> * update as many regions as we can before hbase:meta goes down or have the master run the<a name="line.1291"></a> +<span class="sourceLineNo">1292</span> * close of all regions out on the cluster but we may never be able to achieve the proper state on<a name="line.1292"></a> +<span class="sourceLineNo">1293</span> * all regions (at least not w/o lots of painful manipulations and waiting) so clean shutdown<a name="line.1293"></a> +<span class="sourceLineNo">1294</span> * might not be possible especially on big clusters.... And clean shutdown will take time. Given<a name="line.1294"></a> +<span class="sourceLineNo">1295</span> * this current state of affairs, we just run ServerCrashProcedure in both cases. It will always<a name="line.1295"></a> +<span class="sourceLineNo">1296</span> * do the right thing.<a name="line.1296"></a> +<span class="sourceLineNo">1297</span> * @return True if for sure this is a failover where a Master is starting up into an already<a name="line.1297"></a> +<span class="sourceLineNo">1298</span> * running cluster.<a name="line.1298"></a> +<span class="sourceLineNo">1299</span> */<a name="line.1299"></a> +<span class="sourceLineNo">1300</span> // The assumption here is that if RSs are crashing while we are executing this<a name="line.1300"></a> +<span class="sourceLineNo">1301</span> // they will be handled by the SSH that are put in the ServerManager deadservers "queue".<a name="line.1301"></a> +<span class="sourceLineNo">1302</span> private boolean processofflineServersWithOnlineRegions() {<a name="line.1302"></a> +<span class="sourceLineNo">1303</span> boolean deadServers = !master.getServerManager().getDeadServers().isEmpty();<a name="line.1303"></a> +<span class="sourceLineNo">1304</span> final Set<ServerName> offlineServersWithOnlineRegions = new HashSet<>();<a name="line.1304"></a> +<span class="sourceLineNo">1305</span> int size = regionStates.getRegionStateNodes().size();<a name="line.1305"></a> +<span class="sourceLineNo">1306</span> final List<RegionInfo> offlineRegionsToAssign = new ArrayList<>(size);<a name="line.1306"></a> +<span class="sourceLineNo">1307</span> // If deadservers then its a failover, else, we are not sure yet.<a name="line.1307"></a> +<span class="sourceLineNo">1308</span> boolean failover = deadServers;<a name="line.1308"></a> +<span class="sourceLineNo">1309</span> for (RegionStateNode regionNode: regionStates.getRegionStateNodes()) {<a name="line.1309"></a> +<span class="sourceLineNo">1310</span> // Region State can be OPEN even if we did controlled cluster shutdown; Master does not close<a name="line.1310"></a> +<span class="sourceLineNo">1311</span> // the regions in this case. The RegionServer does the close so hbase:meta is state in<a name="line.1311"></a> +<span class="sourceLineNo">1312</span> // hbase:meta is not updated -- Master does all updates -- and is left with OPEN as region<a name="line.1312"></a> +<span class="sourceLineNo">1313</span> // state in meta. How to tell difference between ordered shutdown and crashed-down cluster<a name="line.1313"></a> +<span class="sourceLineNo">1314</span> // then? We can't. Not currently. Perhaps if we updated hbase:meta with CLOSED on ordered<a name="line.1314"></a> +<span class="sourceLineNo">1315</span> // shutdown. This would slow shutdown though and not all edits would make it in anyways.<a name="line.1315"></a> +<span class="sourceLineNo">1316</span> // TODO: Examine.<a name="line.1316"></a> +<span class="sourceLineNo">1317</span> // Because we can't be sure it an ordered shutdown, we run ServerCrashProcedure always.<a name="line.1317"></a> +<span class="sourceLineNo">1318</span> // ServerCrashProcedure will try to retain old deploy when it goes to assign.<a name="line.1318"></a> +<span class="sourceLineNo">1319</span> if (regionNode.getState() == State.OPEN) {<a name="line.1319"></a> +<span class="sourceLineNo">1320</span> final ServerName serverName = regionNode.getRegionLocation();<a name="line.1320"></a> +<span class="sourceLineNo">1321</span> if (!master.getServerManager().isServerOnline(serverName)) {<a name="line.1321"></a> +<span class="sourceLineNo">1322</span> offlineServersWithOnlineRegions.add(serverName);<a name="line.1322"></a> +<span class="sourceLineNo">1323</span> } else {<a name="line.1323"></a> +<span class="sourceLineNo">1324</span> // Server is online. This a failover. Master is starting into already-running cluster.<a name="line.1324"></a> +<span class="sourceLineNo">1325</span> failover = true;<a name="line.1325"></a> +<span class="sourceLineNo">1326</span> }<a name="line.1326"></a> +<span class="sourceLineNo">1327</span> } else if (regionNode.getState() == State.OFFLINE) {<a name="line.1327"></a> +<span class="sourceLineNo">1328</span> if (isTableEnabled(regionNode.getTable())) {<a name="line.1328"></a> +<span class="sourceLineNo">1329</span> offlineRegionsToAssign.add(regionNode.getRegionInfo());<a name="line.1329"></a> +<span class="sourceLineNo">1330</span> }<a name="line.1330"></a> +<span class="sourceLineNo">1331</span> }<a name="line.1331"></a> +<span class="sourceLineNo">1332</span> }<a name="line.1332"></a> +<span class="sourceLineNo">1333</span> // Kill servers with online regions just-in-case. Runs ServerCrashProcedure.<a name="line.1333"></a> +<span class="sourceLineNo">1334</span> for (ServerName serverName: offlineServersWithOnlineRegions) {<a name="line.1334"></a> +<span class="sourceLineNo">1335</span> if (!master.getServerManager().isServerOnline(serverName)) {<a name="line.1335"></a> +<span class="sourceLineNo">1336</span> LOG.info("KILL RegionServer=" + serverName + " hosting regions but not online.");<a name="line.1336"></a> +<span class="sourceLineNo">1337</span> killRegionServer(serverName);<a name="line.1337"></a> +<span class="sourceLineNo">1338</span> }<a name="line.1338"></a> +<span class="sourceLineNo">1339</span> }<a name="line.1339"></a> +<span class="sourceLineNo">1340</span> setFailoverCleanupDone(true);<a name="line.1340"></a> +<span class="sourceLineNo">1341</span><a name="line.1341"></a> +<span class="sourceLineNo">1342</span> // Assign offline regions. Uses round-robin.<a name="line.1342"></a> +<span class="sourceLineNo">1343</span> if (offlineRegionsToAssign.size() > 0) {<a name="line.1343"></a> +<span class="sourceLineNo">1344</span> master.getMasterProcedureExecutor().submitProcedures(master.getAssignmentManager().<a name="line.1344"></a> +<span class="sourceLineNo">1345</span> createRoundRobinAssignProcedures(offlineRegionsToAssign));<a name="line.1345"></a> +<span class="sourceLineNo">1346</span> }<a name="line.1346"></a> <span class="sourceLineNo">1347</span><a name="line.1347"></a> -<span class="sourceLineNo">1348</span> /**<a name="line.1348"></a> -<span class="sourceLineNo">1349</span> * Used by ServerCrashProcedure to make sure AssignmentManager has completed<a name="line.1349"></a> -<span class="sourceLineNo">1350</span> * the failover cleanup before re-assigning regions of dead servers. So that<a name="line.1350"></a> -<span class="sourceLineNo">1351</span> * when re-assignment happens, AssignmentManager has proper region states.<a name="line.1351"></a> -<span class="sourceLineNo">1352</span> */<a name="line.1352"></a> -<span class="sourceLineNo">1353</span> public boolean isFailoverCleanupDone() {<a name="line.1353"></a> -<span class="sourceLineNo">1354</span> return failoverCleanupDone.isReady();<a name="line.1354"></a> -<span class="sourceLineNo">1355</span> }<a name="line.1355"></a> -<span class="sourceLineNo">1356</span><a name="line.1356"></a> -<span class="sourceLineNo">1357</span> /**<a name="line.1357"></a> -<span class="sourceLineNo">1358</span> * Used by ServerCrashProcedure tests verify the ability to suspend the<a name="line.1358"></a> -<span class="sourceLineNo">1359</span> * execution of the ServerCrashProcedure.<a name="line.1359"></a> -<span class="sourceLineNo">1360</span> */<a name="line.1360"></a> -<span class="sourceLineNo">1361</span> @VisibleForTesting<a name="line.1361"></a> -<span class="sourceLineNo">1362</span> public void setFailoverCleanupDone(final boolean b) {<a name="line.1362"></a> -<span class="sourceLineNo">1363</span> master.getMasterProcedureExecutor().getEnvironment()<a name="line.1363"></a> -<span class="sourceLineNo">1364</span> .setEventReady(failoverCleanupDone, b);<a name="line.1364"></a> -<span class="sourceLineNo">1365</span> }<a name="line.1365"></a> -<span class="sourceLineNo">1366</span><a name="line.1366"></a> -<span class="sourceLineNo">1367</span> public ProcedureEvent getFailoverCleanupEvent() {<a name="line.1367"></a> -<span class="sourceLineNo">1368</span> return failoverCleanupDone;<a name="line.1368"></a> -<span class="sourceLineNo">1369</span> }<a name="line.1369"></a> -<span class="sourceLineNo">1370</span><a name="line.1370"></a> -<span class="sourceLineNo">1371</span> /**<a name="line.1371"></a> -<span class="sourceLineNo">1372</span> * Used to check if the failover cleanup is done.<a name="line.1372"></a> -<span class="sourceLineNo">1373</span> * if not we throw PleaseHoldException since we are rebuilding the RegionStates<a name="line.1373"></a> -<span class="sourceLineNo">1374</span> * @param hri region to check if it is already rebuild<a name="line.1374"></a> -<span class="sourceLineNo">1375</span> * @throws PleaseHoldException if the failover cleanup is not completed<a name="line.1375"></a> -<span class="sourceLineNo">1376</span> */<a name="line.1376"></a> -<span class="sourceLineNo">1377</span> private void checkFailoverCleanupCompleted(final RegionInfo hri) throws PleaseHoldException {<a name="line.1377"></a> -<span class="sourceLineNo">1378</span> if (!isRunning()) {<a name="line.1378"></a> -<span class="sourceLineNo">1379</span> throw new PleaseHoldException("AssignmentManager not running");<a name="line.1379"></a> -<span class="sourceLineNo">1380</span> }<a name="line.1380"></a> -<span class="sourceLineNo">1381</span><a name="line.1381"></a> -<span class="sourceLineNo">1382</span> // TODO: can we avoid throwing an exception if hri is already loaded?<a name="line.1382"></a> -<span class="sourceLineNo">1383</span> // at the moment we bypass only meta<a name="line.1383"></a> -<span class="sourceLineNo">1384</span> boolean meta = isMetaRegion(hri);<a name="line.1384"></a> -<span class="sourceLineNo">1385</span> boolean cleanup = isFailoverCleanupDone();<a name="line.1385"></a> -<span class="sourceLineNo">1386</span> if (!isMetaRegion(hri) && !isFailoverCleanupDone()) {<a name="line.1386"></a> -<span class="sourceLineNo">1387</span> String msg = "Master not fully online; hbase:meta=" + meta + ", failoverCleanup=" + cleanup;<a name="line.1387"></a> -<span class="sourceLineNo">1388</span> throw new PleaseHoldException(msg);<a name="line.1388"></a> -<span class="sourceLineNo">1389</span> }<a name="line.1389"></a> -<span class="sourceLineNo">1390</span> }<a name="line.1390"></a> -<span class="sourceLineNo">1391</span><a name="line.1391"></a> -<span class="sourceLineNo">1392</span> // ============================================================================================<a name="line.1392"></a> -<span class="sourceLineNo">1393</span> // TODO: Metrics<a name="line.1393"></a> -<span class="sourceLineNo">1394</span> // ============================================================================================<a name="line.1394"></a> -<span class="sourceLineNo">1395</span> public int getNumRegionsOpened() {<a name="line.1395"></a> -<span class="sourceLineNo">1396</span> // TODO: Used by TestRegionPlacement.java and assume monotonically increasing value<a name="line.1396"></a> -<span class="sourceLineNo">1397</span> return 0;<a name="line.1397"></a> -<span class="sourceLineNo">1398</span> }<a name="line.1398"></a> -<span class="sourceLineNo">1399</span><a name="line.1399"></a> -<span class="sourceLineNo">1400</span> public void submitServerCrash(final ServerName serverName, final boolean shouldSplitWal) {<a name="line.1400"></a> -<span class="sourceLineNo">1401</span> boolean carryingMeta = isCarryingMeta(serverName);<a name="line.1401"></a> -<span class="sourceLineNo">1402</span> ProcedureExecutor<MasterProcedureEnv> procExec = this.master.getMasterProcedureExecutor();<a name="line.1402"></a> -<span class="sourceLineNo">1403</span> procExec.submitProcedure(new ServerCrashProcedure(procExec.getEnvironment(), serverName,<a name="line.1403"></a> -<span class="sourceLineNo">1404</span> shouldSplitWal, carryingMeta));<a name="line.1404"></a> -<span class="sourceLineNo">1405</span> LOG.debug("Added=" + serverName +<a name="line.1405"></a> -<span class="sourceLineNo">1406</span> " to dead servers, submitted shutdown handler to be executed meta=" + carryingMeta);<a name="line.1406"></a> -<span class="sourceLineNo">1407</span> }<a name="line.1407"></a> -<span class="sourceLineNo">1408</span><a name="line.1408"></a> -<span class="sourceLineNo">1409</span> public void offlineRegion(final RegionInfo regionInfo) {<a name="line.1409"></a> -<span class="sourceLineNo">1410</span> // TODO used by MasterRpcServices ServerCrashProcedure<a name="line.1410"></a> -<span class="sourceLineNo">1411</span> final RegionStateNode node = regionStates.getRegionStateNode(regionInfo);<a name="line.1411"></a> -<span class="sourceLineNo">1412</span> if (node != null) node.offline();<a name="line.1412"></a> -<span class="sourceLineNo">1413</span> }<a name="line.1413"></a> -<span class="sourceLineNo">1414</span><a name="line.1414"></a> -<span class="sourceLineNo">1415</span> public void onlineRegion(final RegionInfo regionInfo, final ServerName serverName) {<a name="line.1415"></a> -<span class="sourceLineNo">1416</span> // TODO used by TestSplitTransactionOnCluster.java<a name="line.1416"></a> -<span class="sourceLineNo">1417</span> }<a name="line.1417"></a> -<span class="sourceLineNo">1418</span><a name="line.1418"></a> -<span class="sourceLineNo">1419</span> public Map<ServerName, List<RegionInfo>> getSnapShotOfAssignment(<a name="line.1419"></a> -<span class="sourceLineNo">1420</span> final Collection<RegionInfo> regions) {<a name="line.1420"></a> -<span class="sourceLineNo">1421</span> return regionStates.getSnapShotOfAssignment(regions);<a name="line.1421"></a> -<span class="sourceLineNo">1422</span> }<a name="line.1422"></a> -<span class="sourceLineNo">1423</span><a name="line.1423"></a> -<span class="sourceLineNo">1424</span> // ============================================================================================<a name="line.1424"></a> -<span class="sourceLineNo">1425</span> // TODO: UTILS/HELPERS?<a name="line.1425"></a> -<span class="sourceLineNo">1426</span> // ============================================================================================<a name="line.1426"></a> -<span class="sourceLineNo">1427</span> /**<a name="line.1427"></a> -<span class="sourceLineNo">1428</span> * Used by the client (via master) to identify if all regions have the schema updates<a name="line.1428"></a> -<span class="sourceLineNo">1429</span> *<a name="line.1429"></a> -<span class="sourceLineNo">1430</span> * @param tableName<a name="line.1430"></a> -<span class="sourceLineNo">1431</span> * @return Pair indicating the status of the alter command (pending/total)<a name="line.1431"></a> -<span class="sourceLineNo">1432</span> * @throws IOException<a name="line.1432"></a> -<span class="sourceLineNo">1433</span> */<a name="line.1433"></a> -<span class="sourceLineNo">1434</span> public Pair<Integer, Integer> getReopenStatus(TableName tableName) {<a name="line.1434"></a> -<span class="sourceLineNo">1435</span> if (isTableDisabled(tableName)) return new Pair<Integer, Integer>(0, 0);<a name="line.1435"></a> -<span class="sourceLineNo">1436</span><a name="line.1436"></a> -<span class="sourceLineNo">1437</span> final List<RegionState> states = regionStates.getTableRegionStates(tableName);<a name="line.1437"></a> -<span class="sourceLineNo">1438</span> int ritCount = 0;<a name="line.1438"></a> -<span class="sourceLineNo">1439</span> for (RegionState regionState: states) {<a name="line.1439"></a> -<span class="sourceLineNo">1440</span> if (!regionState.isOpened()) ritCount++;<a name="line.1440"></a> -<span class="sourceLineNo">1441</span> }<a name="line.1441"></a> -<span class="sourceLineNo">1442</span> return new Pair<Integer, Integer>(ritCount, states.size());<a name="line.1442"></a> -<span class="sourceLineNo">1443</span> }<a name="line.1443"></a> -<span class="sourceLineNo">1444</span><a name="line.1444"></a> -<span class="sourceLineNo">1445</span> // ============================================================================================<a name="line.1445"></a> -<span class="sourceLineNo">1446</span> // TODO: Region State In Transition<a name="line.1446"></a> -<span class="sourceLineNo">1447</span> // ============================================================================================<a name="line.1447"></a> -<span class="sourceLineNo">1448</span> protected boolean addRegionInTransition(final RegionStateNode regionNode,<a name="line.1448"></a> -<span class="sourceLineNo">1449</span> final RegionTransitionProcedure procedure) {<a name="line.1449"></a> -<span class="sourceLineNo">1450</span> return regionStates.addRegionInTransition(regionNode, procedure);<a name="line.1450"></a> -<span class="sourceLineNo">1451</span> }<a name="line.1451"></a> -<span class="sourceLineNo">1452</span><a name="line.1452"></a> -<span class="sourceLineNo">1453</span> protected void removeRegionInTransition(final RegionStateNode regionNode,<a name="line.1453"></a> -<span class="sourceLineNo">1454</span> final RegionTransitionProcedure procedure) {<a name="line.1454"></a> -<span class="sourceLineNo">1455</span> regionStates.removeRegionInTransition(regionNode, procedure);<a name="line.1455"></a> -<span class="sourceLineNo">1456</span> }<a name="line.1456"></a> -<span class="sourceLineNo">1457</span><a name="line.1457"></a> -<span class="sourceLineNo">1458</span> public boolean hasRegionsInTransition() {<a name="line.1458"></a> -<span class="sourceLineNo">1459</span> return regionStates.hasRegionsInTransition();<a name="line.1459"></a> -<span class="sourceLineNo">1460</span> }<a name="line.1460"></a> -<span class="sourceLineNo">1461</span><a name="line.1461"></a> -<span class="sourceLineNo">1462</span> public List<RegionStateNode> getRegionsInTransition() {<a name="line.1462"></a> -<span class="sourceLineNo">1463</span> return regionStates.getRegionsInTransition();<a name="line.1463"></a> -<span class="sourceLineNo">1464</span> }<a name="line.1464"></a> -<span class="sourceLineNo">1465</span><a name="line.1465"></a> -<span class="sourceLineNo">1466</span> public List<RegionInfo> getAssignedRegions() {<a name="line.1466"></a> -<span class="sourceLineNo">1467</span> return regionStates.getAssignedRegions();<a name="line.1467"></a> -<span class="sourceLineNo">1468</span> }<a name="line.1468"></a> -<span class="sourceLineNo">1469</span><a name="line.1469"></a> -<span class="sourceLineNo">1470</span> public RegionInfo getRegionInfo(final byte[] regionName) {<a name="line.1470"></a> -<span class="sourceLineNo">1471</span> final RegionStateNode regionState = regionStates.getRegionStateNodeFromName(regionName);<a name="line.1471"></a> -<span class="sourceLineNo">1472</span> return regionState != null ? regionState.getRegionInfo() : null;<a name="line.1472"></a> -<span class="sourceLineNo">1473</span> }<a name="line.1473"></a> -<span class="sourceLineNo">1474</span><a name="line.1474"></a> -<span class="sourceLineNo">1475</span> // ============================================================================================<a name="line.1475"></a> -<span class="sourceLineNo">1476</span> // TODO: Region Status update<a name="line.1476"></a> -<span class="sourceLineNo">1477</span> // ============================================================================================<a name="line.1477"></a> -<span class="sourceLineNo">1478</span> private void sendRegionOpenedNotification(final RegionInfo regionInfo,<a name="line.1478"></a> -<span class="sourceLineNo">1479</span> final ServerName serverName) {<a name="line.1479"></a> -<span class="sourceLineNo">1480</span> getBalancer().regionOnline(regionInfo, serverName);<a name="line.1480"></a> -<span class="sourceLineNo">1481</span> if (!this.listeners.isEmpty()) {<a name="line.1481"></a> -<span class="sourceLineNo">1482</span> for (AssignmentListener listener : this.listeners) {<a name="line.1482"></a> -<span class="sourceLineNo">1483</span> listener.regionOpened(regionInfo, serverName);<a name="line.1483"></a> -<span class="sourceLineNo">1484</span> }<a name="line.1484"></a> -<span class="sourceLineNo">1485</span> }<a name="line.1485"></a> -<span class="sourceLineNo">1486</span> }<a name="line.1486"></a> -<span class="sourceLineNo">1487</span><a name="line.1487"></a> -<span class="sourceLineNo">1488</span> private void sendRegionClosedNotification(final RegionInfo regionInfo) {<a name="line.1488"></a> -<span class="sourceLineNo">1489</span> getBalancer().regionOffline(regionInfo);<a name="line.1489"></a> -<span class="sourceLineNo">1490</span> if (!this.listeners.isEmpty()) {<a name="line.1490"></a> -<span class="sourceLineNo">1491</span> for (AssignmentListener listener : this.listeners) {<a name="line.1491"></a> -<span class="sourceLineNo">1492</span> listener.regionClosed(regionInfo);<a name="line.1492"></a> -<span class="sourceLineNo">1493</span> }<a name="line.1493"></a> -<span class="sourceLineNo">1494</span> }<a name="line.1494"></a> -<span class="sourceLineNo">1495</span> }<a name="line.1495"></a> -<span class="sourceLineNo">1496</span><a name="line.1496"></a> -<span class="sourceLineNo">1497</span> public void markRegionAsOpening(final RegionStateNode regionNode) throws IOException {<a name="line.1497"></a> -<span class="sourceLineNo">1498</span> synchronized (regionNode) {<a name="line.1498"></a> -<span class="sourceLineNo">1499</span> regionNode.transitionState(State.OPENING, RegionStates.STATES_EXPECTED_ON_OPEN);<a name="line.1499"></a> -<span class="sourceLineNo">1500</span> regionStates.addRegionToServer(regionNode);<a name="line.1500"></a> -<span class="sourceLineNo">1501</span> regionStateStore.updateRegionLocation(regionNode);<a name="line.1501"></a> -<span class="sourceLineNo">1502</span> }<a name="line.1502"></a> -<span class="sourceLineNo">1503</span><a name="line.1503"></a> -<span class="sourceLineNo">1504</span> // update the operation count metrics<a name="line.1504"></a> -<span class="sourceLineNo">1505</span> metrics.incrementOperationCounter();<a name="line.1505"></a> -<span class="sourceLineNo">1506</span> }<a name="line.1506"></a> -<span class="sourceLineNo">1507</span><a name="line.1507"></a> -<span class="sourceLineNo">1508</span> public void undoRegionAsOpening(final RegionStateNode regionNode) {<a name="line.1508"></a> -<span class="sourceLineNo">1509</span> boolean opening = false;<a name="line.1509"></a> -<span class="sourceLineNo">1510</span> synchronized (regionNode) {<a name="line.1510"></a> -<span class="sourceLineNo">1511</span> if (regionNode.isInState(State.OPENING)) {<a name="line.1511"></a> -<span class="sourceLineNo">1512</span> opening = true;<a name="line.1512"></a> -<span class="sourceLineNo">1513</span> regionStates.removeRegionFromServer(regionNode.getRegionLocation(), regionNode);<a name="line.1513"></a> -<span class="sourceLineNo">1514</span> }<a name="line.1514"></a> -<span class="sourceLineNo">1515</span> // Should we update hbase:meta?<a name="line.1515"></a> -<span class="sourceLineNo">1516</span> }<a name="line.1516"></a> -<span class="sourceLineNo">1517</span> if (opening) {<a name="line.1517"></a> -<span class="sourceLineNo">1518</span> // TODO: Metrics. Do opposite of metrics.incrementOperationCounter();<a name="line.1518"></a> +<span class="sourceLineNo">1348</span> return failover;<a name="line.1348"></a> +<span class="sourceLineNo">1349</span> }<a name="line.1349"></a> +<span class="sourceLineNo">1350</span><a name="line.1350"></a> +<span class="sourceLineNo">1351</span> /**<a name="line.1351"></a> +<span class="sourceLineNo">1352</span> * Used by ServerCrashProcedure to make sure AssignmentManager has completed<a name="line.1352"></a> +<span class="sourceLineNo">1353</span> * the failover cleanup before re-assigning regions of dead servers. So that<a name="line.1353"></a> +<span class="sourceLineNo">1354</span> * when re-assignment happens, AssignmentManager has proper region states.<a name="line.1354"></a> +<span class="sourceLineNo">1355</span> */<a name="line.1355"></a> +<span class="sourceLineNo">1356</span> public boolean isFailoverCleanupDone() {<a name="line.1356"></a> +<span class="sourceLineNo">1357</span> return failoverCleanupDone.isReady();<a name="line.1357"></a> +<span class="sourceLineNo">1358</span> }<a name="line.1358"></a> +<span class="sourceLineNo">1359</span><a name="line.1359"></a> +<span class="sourceLineNo">1360</span> /**<a name="line.1360"></a> +<span class="sourceLineNo">1361</span> * Used by ServerCrashProcedure tests verify the ability to suspend the<a name="line.1361"></a> +<span class="sourceLineNo">1362</span> * execution of the ServerCrashProcedure.<a name="line.1362"></a> +<span class="sourceLineNo">1363</span> */<a name="line.1363"></a> +<span class="sourceLineNo">1364</span> @VisibleForTesting<a name="line.1364"></a> +<span class="sourceLineNo">1365</span> public void setFailoverCleanupDone(final boolean b) {<a name="line.1365"></a> +<span class="sourceLineNo">1366</span> master.getMasterProcedureExecutor().getEnvironment()<a name="line.1366"></a> +<span class="sourceLineNo">1367</span> .setEventReady(failoverCleanupDone, b);<a name="line.1367"></a> +<span class="sourceLineNo">1368</span> }<a name="line.1368"></a> +<span class="sourceLineNo">1369</span><a name="line.1369"></a> +<span class="sourceLineNo">1370</span> public ProcedureEvent getFailoverCleanupEvent() {<a name="line.1370"></a> +<span class="sourceLineNo">1371</span> return failoverCleanupDone;<a name="line.1371"></a> +<span class="sourceLineNo">1372</span> }<a name="line.1372"></a> +<span class="sourceLineNo">1373</span><a name="line.1373"></a> +<span class="sourceLineNo">1374</span> /**<a name="line.1374"></a> +<span class="sourceLineNo">1375</span> * Used to check if the failover cleanup is done.<a name="line.1375"></a> +<span class="sourceLineNo">1376</span> * if not we throw PleaseHoldException since we are rebuilding the RegionStates<a name="line.1376"></a> +<span class="sourceLineNo">1377</span> * @param hri region to check if it is already rebuild<a name="line.1377"></a> +<span class="sourceLineNo">1378</span> * @throws PleaseHoldException if the failover cleanup is not completed<a name="line.1378"></a> +<span class="sourceLineNo">1379</span> */<a name="line.1379"></a> +<span class="sourceLineNo">1380</span> private void checkFailoverCleanupCompleted(final RegionInfo hri) throws PleaseHoldException {<a name="line.1380"></a> +<span class="sourceLineNo">1381</span> if (!isRunning()) {<a name="line.1381"></a> +<span class="sourceLineNo">1382</span> throw new PleaseHoldException("AssignmentManager not running");<a name="line.1382"></a> +<span class="sourceLineNo">1383</span> }<a name="line.1383"></a> +<span class="sourceLineNo">1384</span><a name="line.1384"></a> +<span class="sourceLineNo">1385</span> // TODO: can we avoid throwing an exception if hri is already loaded?<a name="line.1385"></a> +<span class="sourceLineNo">1386</span> // at the moment we bypass only meta<a name="line.1386"></a> +<span class="sourceLineNo">1387</span> boolean meta = isMetaRegion(hri);<a name="line.1387"></a> +<span class="sourceLineNo">1388</span> boolean cleanup = isFailoverCleanupDone();<a name="line.1388"></a> +<span class="sourceLineNo">1389</span> if (!isMetaRegion(hri) && !isFailoverCleanupDone()) {<a name="line.1389"></a> +<span class="sourceLineNo">1390</span> String msg = "Master not fully online; hbase:meta=" + meta + ", failoverCleanup=" + cleanup;<a name="line.1390"></a> +<span class="sourceLineNo">1391</span> throw new PleaseHoldException(msg);<a name="line.1391"></a> +<span class="sourceLineNo">1392</span> }<a name="line.1392"></a> +<span class="sourceLineNo">1393</span> }<a name="line.1393"></a> +<span class="sourceLineNo">1394</span><a name="line.1394"></a> +<span class="sourceLineNo">1395</span> // ============================================================================================<a name="line.1395"></a> +<span class="sourceLineNo">1396</span> // TODO: Metrics<a name="line.1396"></a> +<span class="sourceLineNo">1397</span> // ============================================================================================<a name="line.1397"></a> +<span class="sourceLineNo">1398</span> public int getNumRegionsOpened() {<a name="line.1398"></a> +<span class="sourceLineNo">1399</span> // TODO: Used by TestRegionPlacement.java and assume monotonically increasing value<a name="line.1399"></a> +<span class="sourceLineNo">1400</span> return 0;<a name="line.1400"></a> +<span class="sourceLineNo">1401</span> }<a name="line.1401"></a> +<span class="sourceLineNo">1402</span><a name="line.1402"></a> +<span class="sourceLineNo">1403</span> public void submitServerCrash(final ServerName serverName, final boolean shouldSplitWal) {<a name="line.1403"></a> +<span class="sourceLineNo">1404</span> boolean carryingMeta = isCarryingMeta(serverName);<a name="line.1404"></a> +<span class="sourceLineNo">1405</span> ProcedureExecutor<MasterProcedureEnv> procExec = this.master.getMasterProcedureExecutor();<a name="line.1405"></a> +<span class="sourceLineNo">1406</span> procExec.submitProcedure(new ServerCrashProcedure(procExec.getEnvironment(), serverName,<a name="line.1406"></a> +<span class="sourceLineNo">1407</span> shouldSplitWal, carryingMeta));<a name="line.1407"></a> +<span class="sourceLineNo">1408</span> LOG.debug("Added=" + serverName +<a name="line.1408"></a> +<span class="sourceLineNo">1409</span> " to dead servers, submitted shutdown handler to be executed meta=" + carryingMeta);<a name="line.1409"></a> +<span class="sourceLineNo">1410</span> }<a name="line.1410"></a> +<span class="sourceLineNo">1411</span><a name="line.1411"></a> +<span class="sourceLineNo">1412</span> public void offlineRegion(final RegionInfo regionInfo) {<a name="line.1412"></a> +<span class="sourceLineNo">1413</span> // TODO used by MasterRpcServices ServerCrashProcedure<a name="line.1413"></a> +<span class="sourceLineNo">1414</span> final RegionStateNode node = regionStates.getRegionStateNode(regionInfo);<a name="line.1414"></a> +<span class="sourceLineNo">1415</span> if (node != null) node.offline();<a name="line.1415"></a> +<span class="sourceLineNo">1416</span> }<a name="line.1416"></a> +<span class="sourceLineNo">1417</span><a name="line.1417"></a> +<span class="sourceLineNo">1418</span> public void onlineRegion(final RegionInfo regionInfo, final ServerName serverName) {<a name="line.1418"></a> +<span class="sourceLineNo">1419</span> // TODO used by TestSplitTransactionOnCluster.java<a name="line.1419"></a> +<span class="sourceLineNo">1420</span> }<a name="line.1420"></a> +<span class="sourceLineNo">1421</span><a name="line.1421"></a> +<span class="sourceLineNo">1422</span> public Map<ServerName, List<RegionInfo>> getSnapShotOfAssignment(<a name="line.1422"></a> +<span class="sourceLineNo">1423</span> final Collection<RegionInfo> regions) {<a name="line.1423"></a> +<span class="sourceLineNo">1424</span> return regionStates.getSnapShotOfAssignment(regions);<a name="line.1424"></a> +<span class="sourceLineNo">1425</span> }<a name="line.1425"></a> +<span class="sourceLineNo">1426</span><a name="line.1426"></a> +<span class="sourceLineNo">1427</span> // ============================================================================================<a name="line.1427"></a> +<span class="sourceLineNo">1428</span> // TODO: UTILS/HELPERS?<a name="line.1428"></a> +<span class="sourceLineNo">1429</span> // ============================================================================================<a name="line.1429"></a> +<span class="sourceLineNo">1430</span> /**<a name="line.1430"></a> +<span class="sourceLineNo">1431</span> * Used by the client (via master) to identify if all regions have the schema updates<a name="line.1431"></a> +<span class="sourceLineNo">1432</span> *<a name="line.1432"></a> +<span class="sourceLineNo">1433</span> * @param tableName<a name="line.1433"></a> +<span class="sourceLineNo">1434</span> * @return Pair indicating the status of the alter command (pending/total)<a name="line.1434"></a> +<span class="sourceLineNo">1435</span> * @throws IOException<a name="line.1435"></a> +<span class="sourceLineNo">1436</span> */<a name="line.1436"></a> +<span class="sourceLineNo">1437</span> public Pair<Integer, Integer> getReopenStatus(TableName tableName) {<a name="line.1437"></a> +<span class="sourceLineNo">1438</span> if (isTableDisabled(tableName)) return new Pair<Integer, Integer>(0, 0);<a name="line.1438"></a> +<span class="sourceLineNo">1439</span><a name="line.1439"></a> +<span class="sourceLineNo">1440</span> final List<RegionState> states = regionStates.getTableRegionStates(tableName);<a name="line.1440"></a> +<span class="sourceLineNo">1441</span> int ritCount = 0;<a name="line.1441"></a> +<span class="sourceLineNo">1442</span> for (RegionState regionState: states) {<a name="line.1442"></a> +<span class="sourceLineNo">1443</span> if (!regionState.isOpened()) ritCount++;<a name="line
<TRUNCATED>