http://git-wip-us.apache.org/repos/asf/hbase-site/blob/d220bc5e/devapidocs/src-html/org/apache/hadoop/hbase/util/HBaseFsck.HdfsEntry.html ---------------------------------------------------------------------- diff --git a/devapidocs/src-html/org/apache/hadoop/hbase/util/HBaseFsck.HdfsEntry.html b/devapidocs/src-html/org/apache/hadoop/hbase/util/HBaseFsck.HdfsEntry.html index 8302e28..c370eb9 100644 --- a/devapidocs/src-html/org/apache/hadoop/hbase/util/HBaseFsck.HdfsEntry.html +++ b/devapidocs/src-html/org/apache/hadoop/hbase/util/HBaseFsck.HdfsEntry.html @@ -2113,3031 +2113,3033 @@ <span class="sourceLineNo">2105</span> errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,<a name="line.2105"></a> <span class="sourceLineNo">2106</span> tableName + " unable to delete dangling table state " + tableState);<a name="line.2106"></a> <span class="sourceLineNo">2107</span> }<a name="line.2107"></a> -<span class="sourceLineNo">2108</span> } else {<a name="line.2108"></a> -<span class="sourceLineNo">2109</span> errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,<a name="line.2109"></a> -<span class="sourceLineNo">2110</span> tableName + " has dangling table state " + tableState);<a name="line.2110"></a> -<span class="sourceLineNo">2111</span> }<a name="line.2111"></a> -<span class="sourceLineNo">2112</span> }<a name="line.2112"></a> -<span class="sourceLineNo">2113</span> }<a name="line.2113"></a> -<span class="sourceLineNo">2114</span> // check that all tables have states<a name="line.2114"></a> -<span class="sourceLineNo">2115</span> for (TableName tableName : tablesInfo.keySet()) {<a name="line.2115"></a> -<span class="sourceLineNo">2116</span> if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {<a name="line.2116"></a> -<span class="sourceLineNo">2117</span> if (fixMeta) {<a name="line.2117"></a> -<span class="sourceLineNo">2118</span> MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);<a name="line.2118"></a> -<span class="sourceLineNo">2119</span> TableState newState = MetaTableAccessor.getTableState(connection, tableName);<a name="line.2119"></a> -<span class="sourceLineNo">2120</span> if (newState == null) {<a name="line.2120"></a> -<span class="sourceLineNo">2121</span> errors.reportError(ERROR_CODE.NO_TABLE_STATE,<a name="line.2121"></a> -<span class="sourceLineNo">2122</span> "Unable to change state for table " + tableName + " in meta ");<a name="line.2122"></a> -<span class="sourceLineNo">2123</span> }<a name="line.2123"></a> -<span class="sourceLineNo">2124</span> } else {<a name="line.2124"></a> -<span class="sourceLineNo">2125</span> errors.reportError(ERROR_CODE.NO_TABLE_STATE,<a name="line.2125"></a> -<span class="sourceLineNo">2126</span> tableName + " has no state in meta ");<a name="line.2126"></a> -<span class="sourceLineNo">2127</span> }<a name="line.2127"></a> -<span class="sourceLineNo">2128</span> }<a name="line.2128"></a> -<span class="sourceLineNo">2129</span> }<a name="line.2129"></a> -<span class="sourceLineNo">2130</span> }<a name="line.2130"></a> -<span class="sourceLineNo">2131</span><a name="line.2131"></a> -<span class="sourceLineNo">2132</span> private void preCheckPermission() throws IOException, AccessDeniedException {<a name="line.2132"></a> -<span class="sourceLineNo">2133</span> if (shouldIgnorePreCheckPermission()) {<a name="line.2133"></a> -<span class="sourceLineNo">2134</span> return;<a name="line.2134"></a> -<span class="sourceLineNo">2135</span> }<a name="line.2135"></a> -<span class="sourceLineNo">2136</span><a name="line.2136"></a> -<span class="sourceLineNo">2137</span> Path hbaseDir = FSUtils.getRootDir(getConf());<a name="line.2137"></a> -<span class="sourceLineNo">2138</span> FileSystem fs = hbaseDir.getFileSystem(getConf());<a name="line.2138"></a> -<span class="sourceLineNo">2139</span> UserProvider userProvider = UserProvider.instantiate(getConf());<a name="line.2139"></a> -<span class="sourceLineNo">2140</span> UserGroupInformation ugi = userProvider.getCurrent().getUGI();<a name="line.2140"></a> -<span class="sourceLineNo">2141</span> FileStatus[] files = fs.listStatus(hbaseDir);<a name="line.2141"></a> -<span class="sourceLineNo">2142</span> for (FileStatus file : files) {<a name="line.2142"></a> -<span class="sourceLineNo">2143</span> try {<a name="line.2143"></a> -<span class="sourceLineNo">2144</span> FSUtils.checkAccess(ugi, file, FsAction.WRITE);<a name="line.2144"></a> -<span class="sourceLineNo">2145</span> } catch (AccessDeniedException ace) {<a name="line.2145"></a> -<span class="sourceLineNo">2146</span> LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);<a name="line.2146"></a> -<span class="sourceLineNo">2147</span> errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()<a name="line.2147"></a> -<span class="sourceLineNo">2148</span> + " does not have write perms to " + file.getPath()<a name="line.2148"></a> -<span class="sourceLineNo">2149</span> + ". Please rerun hbck as hdfs user " + file.getOwner());<a name="line.2149"></a> -<span class="sourceLineNo">2150</span> throw ace;<a name="line.2150"></a> -<span class="sourceLineNo">2151</span> }<a name="line.2151"></a> -<span class="sourceLineNo">2152</span> }<a name="line.2152"></a> -<span class="sourceLineNo">2153</span> }<a name="line.2153"></a> -<span class="sourceLineNo">2154</span><a name="line.2154"></a> -<span class="sourceLineNo">2155</span> /**<a name="line.2155"></a> -<span class="sourceLineNo">2156</span> * Deletes region from meta table<a name="line.2156"></a> -<span class="sourceLineNo">2157</span> */<a name="line.2157"></a> -<span class="sourceLineNo">2158</span> private void deleteMetaRegion(HbckInfo hi) throws IOException {<a name="line.2158"></a> -<span class="sourceLineNo">2159</span> deleteMetaRegion(hi.metaEntry.getRegionName());<a name="line.2159"></a> -<span class="sourceLineNo">2160</span> }<a name="line.2160"></a> -<span class="sourceLineNo">2161</span><a name="line.2161"></a> -<span class="sourceLineNo">2162</span> /**<a name="line.2162"></a> -<span class="sourceLineNo">2163</span> * Deletes region from meta table<a name="line.2163"></a> -<span class="sourceLineNo">2164</span> */<a name="line.2164"></a> -<span class="sourceLineNo">2165</span> private void deleteMetaRegion(byte[] metaKey) throws IOException {<a name="line.2165"></a> -<span class="sourceLineNo">2166</span> Delete d = new Delete(metaKey);<a name="line.2166"></a> -<span class="sourceLineNo">2167</span> meta.delete(d);<a name="line.2167"></a> -<span class="sourceLineNo">2168</span> LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );<a name="line.2168"></a> -<span class="sourceLineNo">2169</span> }<a name="line.2169"></a> -<span class="sourceLineNo">2170</span><a name="line.2170"></a> -<span class="sourceLineNo">2171</span> /**<a name="line.2171"></a> -<span class="sourceLineNo">2172</span> * Reset the split parent region info in meta table<a name="line.2172"></a> -<span class="sourceLineNo">2173</span> */<a name="line.2173"></a> -<span class="sourceLineNo">2174</span> private void resetSplitParent(HbckInfo hi) throws IOException {<a name="line.2174"></a> -<span class="sourceLineNo">2175</span> RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());<a name="line.2175"></a> -<span class="sourceLineNo">2176</span> Delete d = new Delete(hi.metaEntry.getRegionName());<a name="line.2176"></a> -<span class="sourceLineNo">2177</span> d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);<a name="line.2177"></a> -<span class="sourceLineNo">2178</span> d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);<a name="line.2178"></a> -<span class="sourceLineNo">2179</span> mutations.add(d);<a name="line.2179"></a> -<span class="sourceLineNo">2180</span><a name="line.2180"></a> -<span class="sourceLineNo">2181</span> RegionInfo hri = RegionInfoBuilder.newBuilder(hi.metaEntry)<a name="line.2181"></a> -<span class="sourceLineNo">2182</span> .setOffline(false)<a name="line.2182"></a> -<span class="sourceLineNo">2183</span> .setSplit(false)<a name="line.2183"></a> -<span class="sourceLineNo">2184</span> .build();<a name="line.2184"></a> -<span class="sourceLineNo">2185</span> Put p = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime());<a name="line.2185"></a> -<span class="sourceLineNo">2186</span> mutations.add(p);<a name="line.2186"></a> -<span class="sourceLineNo">2187</span><a name="line.2187"></a> -<span class="sourceLineNo">2188</span> meta.mutateRow(mutations);<a name="line.2188"></a> -<span class="sourceLineNo">2189</span> LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );<a name="line.2189"></a> -<span class="sourceLineNo">2190</span> }<a name="line.2190"></a> -<span class="sourceLineNo">2191</span><a name="line.2191"></a> -<span class="sourceLineNo">2192</span> /**<a name="line.2192"></a> -<span class="sourceLineNo">2193</span> * This backwards-compatibility wrapper for permanently offlining a region<a name="line.2193"></a> -<span class="sourceLineNo">2194</span> * that should not be alive. If the region server does not support the<a name="line.2194"></a> -<span class="sourceLineNo">2195</span> * "offline" method, it will use the closest unassign method instead. This<a name="line.2195"></a> -<span class="sourceLineNo">2196</span> * will basically work until one attempts to disable or delete the affected<a name="line.2196"></a> -<span class="sourceLineNo">2197</span> * table. The problem has to do with in-memory only master state, so<a name="line.2197"></a> -<span class="sourceLineNo">2198</span> * restarting the HMaster or failing over to another should fix this.<a name="line.2198"></a> -<span class="sourceLineNo">2199</span> */<a name="line.2199"></a> -<span class="sourceLineNo">2200</span> private void offline(byte[] regionName) throws IOException {<a name="line.2200"></a> -<span class="sourceLineNo">2201</span> String regionString = Bytes.toStringBinary(regionName);<a name="line.2201"></a> -<span class="sourceLineNo">2202</span> if (!rsSupportsOffline) {<a name="line.2202"></a> -<span class="sourceLineNo">2203</span> LOG.warn("Using unassign region " + regionString<a name="line.2203"></a> -<span class="sourceLineNo">2204</span> + " instead of using offline method, you should"<a name="line.2204"></a> -<span class="sourceLineNo">2205</span> + " restart HMaster after these repairs");<a name="line.2205"></a> -<span class="sourceLineNo">2206</span> admin.unassign(regionName, true);<a name="line.2206"></a> -<span class="sourceLineNo">2207</span> return;<a name="line.2207"></a> -<span class="sourceLineNo">2208</span> }<a name="line.2208"></a> -<span class="sourceLineNo">2209</span><a name="line.2209"></a> -<span class="sourceLineNo">2210</span> // first time we assume the rs's supports #offline.<a name="line.2210"></a> -<span class="sourceLineNo">2211</span> try {<a name="line.2211"></a> -<span class="sourceLineNo">2212</span> LOG.info("Offlining region " + regionString);<a name="line.2212"></a> -<span class="sourceLineNo">2213</span> admin.offline(regionName);<a name="line.2213"></a> -<span class="sourceLineNo">2214</span> } catch (IOException ioe) {<a name="line.2214"></a> -<span class="sourceLineNo">2215</span> String notFoundMsg = "java.lang.NoSuchMethodException: " +<a name="line.2215"></a> -<span class="sourceLineNo">2216</span> "org.apache.hadoop.hbase.master.HMaster.offline([B)";<a name="line.2216"></a> -<span class="sourceLineNo">2217</span> if (ioe.getMessage().contains(notFoundMsg)) {<a name="line.2217"></a> -<span class="sourceLineNo">2218</span> LOG.warn("Using unassign region " + regionString<a name="line.2218"></a> -<span class="sourceLineNo">2219</span> + " instead of using offline method, you should"<a name="line.2219"></a> -<span class="sourceLineNo">2220</span> + " restart HMaster after these repairs");<a name="line.2220"></a> -<span class="sourceLineNo">2221</span> rsSupportsOffline = false; // in the future just use unassign<a name="line.2221"></a> -<span class="sourceLineNo">2222</span> admin.unassign(regionName, true);<a name="line.2222"></a> -<span class="sourceLineNo">2223</span> return;<a name="line.2223"></a> -<span class="sourceLineNo">2224</span> }<a name="line.2224"></a> -<span class="sourceLineNo">2225</span> throw ioe;<a name="line.2225"></a> -<span class="sourceLineNo">2226</span> }<a name="line.2226"></a> -<span class="sourceLineNo">2227</span> }<a name="line.2227"></a> -<span class="sourceLineNo">2228</span><a name="line.2228"></a> -<span class="sourceLineNo">2229</span> private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {<a name="line.2229"></a> -<span class="sourceLineNo">2230</span> undeployRegionsForHbi(hi);<a name="line.2230"></a> -<span class="sourceLineNo">2231</span> // undeploy replicas of the region (but only if the method is invoked for the primary)<a name="line.2231"></a> -<span class="sourceLineNo">2232</span> if (hi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {<a name="line.2232"></a> -<span class="sourceLineNo">2233</span> return;<a name="line.2233"></a> -<span class="sourceLineNo">2234</span> }<a name="line.2234"></a> -<span class="sourceLineNo">2235</span> int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();<a name="line.2235"></a> -<span class="sourceLineNo">2236</span> for (int i = 1; i < numReplicas; i++) {<a name="line.2236"></a> -<span class="sourceLineNo">2237</span> if (hi.getPrimaryHRIForDeployedReplica() == null) continue;<a name="line.2237"></a> -<span class="sourceLineNo">2238</span> RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(<a name="line.2238"></a> -<span class="sourceLineNo">2239</span> hi.getPrimaryHRIForDeployedReplica(), i);<a name="line.2239"></a> -<span class="sourceLineNo">2240</span> HbckInfo h = regionInfoMap.get(hri.getEncodedName());<a name="line.2240"></a> -<span class="sourceLineNo">2241</span> if (h != null) {<a name="line.2241"></a> -<span class="sourceLineNo">2242</span> undeployRegionsForHbi(h);<a name="line.2242"></a> -<span class="sourceLineNo">2243</span> //set skip checks; we undeployed it, and we don't want to evaluate this anymore<a name="line.2243"></a> -<span class="sourceLineNo">2244</span> //in consistency checks<a name="line.2244"></a> -<span class="sourceLineNo">2245</span> h.setSkipChecks(true);<a name="line.2245"></a> -<span class="sourceLineNo">2246</span> }<a name="line.2246"></a> -<span class="sourceLineNo">2247</span> }<a name="line.2247"></a> -<span class="sourceLineNo">2248</span> }<a name="line.2248"></a> -<span class="sourceLineNo">2249</span><a name="line.2249"></a> -<span class="sourceLineNo">2250</span> private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {<a name="line.2250"></a> -<span class="sourceLineNo">2251</span> for (OnlineEntry rse : hi.deployedEntries) {<a name="line.2251"></a> -<span class="sourceLineNo">2252</span> LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa);<a name="line.2252"></a> -<span class="sourceLineNo">2253</span> try {<a name="line.2253"></a> -<span class="sourceLineNo">2254</span> HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);<a name="line.2254"></a> -<span class="sourceLineNo">2255</span> offline(rse.hri.getRegionName());<a name="line.2255"></a> -<span class="sourceLineNo">2256</span> } catch (IOException ioe) {<a name="line.2256"></a> -<span class="sourceLineNo">2257</span> LOG.warn("Got exception when attempting to offline region "<a name="line.2257"></a> -<span class="sourceLineNo">2258</span> + Bytes.toString(rse.hri.getRegionName()), ioe);<a name="line.2258"></a> -<span class="sourceLineNo">2259</span> }<a name="line.2259"></a> -<span class="sourceLineNo">2260</span> }<a name="line.2260"></a> -<span class="sourceLineNo">2261</span> }<a name="line.2261"></a> -<span class="sourceLineNo">2262</span><a name="line.2262"></a> -<span class="sourceLineNo">2263</span> /**<a name="line.2263"></a> -<span class="sourceLineNo">2264</span> * Attempts to undeploy a region from a region server based in information in<a name="line.2264"></a> -<span class="sourceLineNo">2265</span> * META. Any operations that modify the file system should make sure that<a name="line.2265"></a> -<span class="sourceLineNo">2266</span> * its corresponding region is not deployed to prevent data races.<a name="line.2266"></a> -<span class="sourceLineNo">2267</span> *<a name="line.2267"></a> -<span class="sourceLineNo">2268</span> * A separate call is required to update the master in-memory region state<a name="line.2268"></a> -<span class="sourceLineNo">2269</span> * kept in the AssignementManager. Because disable uses this state instead of<a name="line.2269"></a> -<span class="sourceLineNo">2270</span> * that found in META, we can't seem to cleanly disable/delete tables that<a name="line.2270"></a> -<span class="sourceLineNo">2271</span> * have been hbck fixed. When used on a version of HBase that does not have<a name="line.2271"></a> -<span class="sourceLineNo">2272</span> * the offline ipc call exposed on the master (&lt;0.90.5, &lt;0.92.0) a master<a name="line.2272"></a> -<span class="sourceLineNo">2273</span> * restart or failover may be required.<a name="line.2273"></a> -<span class="sourceLineNo">2274</span> */<a name="line.2274"></a> -<span class="sourceLineNo">2275</span> private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {<a name="line.2275"></a> -<span class="sourceLineNo">2276</span> if (hi.metaEntry == null && hi.hdfsEntry == null) {<a name="line.2276"></a> -<span class="sourceLineNo">2277</span> undeployRegions(hi);<a name="line.2277"></a> -<span class="sourceLineNo">2278</span> return;<a name="line.2278"></a> -<span class="sourceLineNo">2279</span> }<a name="line.2279"></a> -<span class="sourceLineNo">2280</span><a name="line.2280"></a> -<span class="sourceLineNo">2281</span> // get assignment info and hregioninfo from meta.<a name="line.2281"></a> -<span class="sourceLineNo">2282</span> Get get = new Get(hi.getRegionName());<a name="line.2282"></a> -<span class="sourceLineNo">2283</span> get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);<a name="line.2283"></a> -<span class="sourceLineNo">2284</span> get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);<a name="line.2284"></a> -<span class="sourceLineNo">2285</span> get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);<a name="line.2285"></a> -<span class="sourceLineNo">2286</span> // also get the locations of the replicas to close if the primary region is being closed<a name="line.2286"></a> -<span class="sourceLineNo">2287</span> if (hi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {<a name="line.2287"></a> -<span class="sourceLineNo">2288</span> int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();<a name="line.2288"></a> -<span class="sourceLineNo">2289</span> for (int i = 0; i < numReplicas; i++) {<a name="line.2289"></a> -<span class="sourceLineNo">2290</span> get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));<a name="line.2290"></a> -<span class="sourceLineNo">2291</span> get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));<a name="line.2291"></a> -<span class="sourceLineNo">2292</span> }<a name="line.2292"></a> -<span class="sourceLineNo">2293</span> }<a name="line.2293"></a> -<span class="sourceLineNo">2294</span> Result r = meta.get(get);<a name="line.2294"></a> -<span class="sourceLineNo">2295</span> RegionLocations rl = MetaTableAccessor.getRegionLocations(r);<a name="line.2295"></a> -<span class="sourceLineNo">2296</span> if (rl == null) {<a name="line.2296"></a> -<span class="sourceLineNo">2297</span> LOG.warn("Unable to close region " + hi.getRegionNameAsString() +<a name="line.2297"></a> -<span class="sourceLineNo">2298</span> " since meta does not have handle to reach it");<a name="line.2298"></a> -<span class="sourceLineNo">2299</span> return;<a name="line.2299"></a> -<span class="sourceLineNo">2300</span> }<a name="line.2300"></a> -<span class="sourceLineNo">2301</span> for (HRegionLocation h : rl.getRegionLocations()) {<a name="line.2301"></a> -<span class="sourceLineNo">2302</span> ServerName serverName = h.getServerName();<a name="line.2302"></a> -<span class="sourceLineNo">2303</span> if (serverName == null) {<a name="line.2303"></a> -<span class="sourceLineNo">2304</span> errors.reportError("Unable to close region "<a name="line.2304"></a> -<span class="sourceLineNo">2305</span> + hi.getRegionNameAsString() + " because meta does not "<a name="line.2305"></a> -<span class="sourceLineNo">2306</span> + "have handle to reach it.");<a name="line.2306"></a> -<span class="sourceLineNo">2307</span> continue;<a name="line.2307"></a> -<span class="sourceLineNo">2308</span> }<a name="line.2308"></a> -<span class="sourceLineNo">2309</span> RegionInfo hri = h.getRegionInfo();<a name="line.2309"></a> -<span class="sourceLineNo">2310</span> if (hri == null) {<a name="line.2310"></a> -<span class="sourceLineNo">2311</span> LOG.warn("Unable to close region " + hi.getRegionNameAsString()<a name="line.2311"></a> -<span class="sourceLineNo">2312</span> + " because hbase:meta had invalid or missing "<a name="line.2312"></a> -<span class="sourceLineNo">2313</span> + HConstants.CATALOG_FAMILY_STR + ":"<a name="line.2313"></a> -<span class="sourceLineNo">2314</span> + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)<a name="line.2314"></a> -<span class="sourceLineNo">2315</span> + " qualifier value.");<a name="line.2315"></a> -<span class="sourceLineNo">2316</span> continue;<a name="line.2316"></a> -<span class="sourceLineNo">2317</span> }<a name="line.2317"></a> -<span class="sourceLineNo">2318</span> // close the region -- close files and remove assignment<a name="line.2318"></a> -<span class="sourceLineNo">2319</span> HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);<a name="line.2319"></a> -<span class="sourceLineNo">2320</span> }<a name="line.2320"></a> -<span class="sourceLineNo">2321</span> }<a name="line.2321"></a> -<span class="sourceLineNo">2322</span><a name="line.2322"></a> -<span class="sourceLineNo">2323</span> private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,<a name="line.2323"></a> -<span class="sourceLineNo">2324</span> KeeperException, InterruptedException {<a name="line.2324"></a> -<span class="sourceLineNo">2325</span> // If we are trying to fix the errors<a name="line.2325"></a> -<span class="sourceLineNo">2326</span> if (shouldFixAssignments()) {<a name="line.2326"></a> -<span class="sourceLineNo">2327</span> errors.print(msg);<a name="line.2327"></a> -<span class="sourceLineNo">2328</span> undeployRegions(hbi);<a name="line.2328"></a> -<span class="sourceLineNo">2329</span> setShouldRerun();<a name="line.2329"></a> -<span class="sourceLineNo">2330</span> RegionInfo hri = hbi.getHdfsHRI();<a name="line.2330"></a> -<span class="sourceLineNo">2331</span> if (hri == null) {<a name="line.2331"></a> -<span class="sourceLineNo">2332</span> hri = hbi.metaEntry;<a name="line.2332"></a> -<span class="sourceLineNo">2333</span> }<a name="line.2333"></a> -<span class="sourceLineNo">2334</span> HBaseFsckRepair.fixUnassigned(admin, hri);<a name="line.2334"></a> -<span class="sourceLineNo">2335</span> HBaseFsckRepair.waitUntilAssigned(admin, hri);<a name="line.2335"></a> -<span class="sourceLineNo">2336</span><a name="line.2336"></a> -<span class="sourceLineNo">2337</span> // also assign replicas if needed (do it only when this call operates on a primary replica)<a name="line.2337"></a> -<span class="sourceLineNo">2338</span> if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) return;<a name="line.2338"></a> -<span class="sourceLineNo">2339</span> int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();<a name="line.2339"></a> -<span class="sourceLineNo">2340</span> for (int i = 1; i < replicationCount; i++) {<a name="line.2340"></a> -<span class="sourceLineNo">2341</span> hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);<a name="line.2341"></a> -<span class="sourceLineNo">2342</span> HbckInfo h = regionInfoMap.get(hri.getEncodedName());<a name="line.2342"></a> -<span class="sourceLineNo">2343</span> if (h != null) {<a name="line.2343"></a> -<span class="sourceLineNo">2344</span> undeployRegions(h);<a name="line.2344"></a> -<span class="sourceLineNo">2345</span> //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore<a name="line.2345"></a> -<span class="sourceLineNo">2346</span> //in consistency checks<a name="line.2346"></a> -<span class="sourceLineNo">2347</span> h.setSkipChecks(true);<a name="line.2347"></a> -<span class="sourceLineNo">2348</span> }<a name="line.2348"></a> -<span class="sourceLineNo">2349</span> HBaseFsckRepair.fixUnassigned(admin, hri);<a name="line.2349"></a> -<span class="sourceLineNo">2350</span> HBaseFsckRepair.waitUntilAssigned(admin, hri);<a name="line.2350"></a> -<span class="sourceLineNo">2351</span> }<a name="line.2351"></a> -<span class="sourceLineNo">2352</span><a name="line.2352"></a> -<span class="sourceLineNo">2353</span> }<a name="line.2353"></a> -<span class="sourceLineNo">2354</span> }<a name="line.2354"></a> -<span class="sourceLineNo">2355</span><a name="line.2355"></a> -<span class="sourceLineNo">2356</span> /**<a name="line.2356"></a> -<span class="sourceLineNo">2357</span> * Check a single region for consistency and correct deployment.<a name="line.2357"></a> -<span class="sourceLineNo">2358</span> */<a name="line.2358"></a> -<span class="sourceLineNo">2359</span> private void checkRegionConsistency(final String key, final HbckInfo hbi)<a name="line.2359"></a> -<span class="sourceLineNo">2360</span> throws IOException, KeeperException, InterruptedException {<a name="line.2360"></a> -<span class="sourceLineNo">2361</span><a name="line.2361"></a> -<span class="sourceLineNo">2362</span> if (hbi.isSkipChecks()) return;<a name="line.2362"></a> -<span class="sourceLineNo">2363</span> String descriptiveName = hbi.toString();<a name="line.2363"></a> -<span class="sourceLineNo">2364</span> boolean inMeta = hbi.metaEntry != null;<a name="line.2364"></a> -<span class="sourceLineNo">2365</span> // In case not checking HDFS, assume the region is on HDFS<a name="line.2365"></a> -<span class="sourceLineNo">2366</span> boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;<a name="line.2366"></a> -<span class="sourceLineNo">2367</span> boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;<a name="line.2367"></a> -<span class="sourceLineNo">2368</span> boolean isDeployed = !hbi.deployedOn.isEmpty();<a name="line.2368"></a> -<span class="sourceLineNo">2369</span> boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;<a name="line.2369"></a> -<span class="sourceLineNo">2370</span> boolean deploymentMatchesMeta =<a name="line.2370"></a> -<span class="sourceLineNo">2371</span> hasMetaAssignment && isDeployed && !isMultiplyDeployed &&<a name="line.2371"></a> -<span class="sourceLineNo">2372</span> hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));<a name="line.2372"></a> -<span class="sourceLineNo">2373</span> boolean splitParent =<a name="line.2373"></a> -<span class="sourceLineNo">2374</span> inMeta && hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();<a name="line.2374"></a> -<span class="sourceLineNo">2375</span> boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry.getTable());<a name="line.2375"></a> -<span class="sourceLineNo">2376</span> boolean recentlyModified = inHdfs &&<a name="line.2376"></a> -<span class="sourceLineNo">2377</span> hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();<a name="line.2377"></a> -<span class="sourceLineNo">2378</span><a name="line.2378"></a> -<span class="sourceLineNo">2379</span> // ========== First the healthy cases =============<a name="line.2379"></a> -<span class="sourceLineNo">2380</span> if (hbi.containsOnlyHdfsEdits()) {<a name="line.2380"></a> -<span class="sourceLineNo">2381</span> return;<a name="line.2381"></a> -<span class="sourceLineNo">2382</span> }<a name="line.2382"></a> -<span class="sourceLineNo">2383</span> if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {<a name="line.2383"></a> -<span class="sourceLineNo">2384</span> return;<a name="line.2384"></a> -<span class="sourceLineNo">2385</span> } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {<a name="line.2385"></a> -<span class="sourceLineNo">2386</span> LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +<a name="line.2386"></a> -<span class="sourceLineNo">2387</span> "tabled that is not deployed");<a name="line.2387"></a> -<span class="sourceLineNo">2388</span> return;<a name="line.2388"></a> -<span class="sourceLineNo">2389</span> } else if (recentlyModified) {<a name="line.2389"></a> -<span class="sourceLineNo">2390</span> LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");<a name="line.2390"></a> -<span class="sourceLineNo">2391</span> return;<a name="line.2391"></a> -<span class="sourceLineNo">2392</span> }<a name="line.2392"></a> -<span class="sourceLineNo">2393</span> // ========== Cases where the region is not in hbase:meta =============<a name="line.2393"></a> -<span class="sourceLineNo">2394</span> else if (!inMeta && !inHdfs && !isDeployed) {<a name="line.2394"></a> -<span class="sourceLineNo">2395</span> // We shouldn't have record of this region at all then!<a name="line.2395"></a> -<span class="sourceLineNo">2396</span> assert false : "Entry for region with no data";<a name="line.2396"></a> -<span class="sourceLineNo">2397</span> } else if (!inMeta && !inHdfs && isDeployed) {<a name="line.2397"></a> -<span class="sourceLineNo">2398</span> errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "<a name="line.2398"></a> -<span class="sourceLineNo">2399</span> + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +<a name="line.2399"></a> -<span class="sourceLineNo">2400</span> "deployed on " + Joiner.on(", ").join(hbi.deployedOn));<a name="line.2400"></a> -<span class="sourceLineNo">2401</span> if (shouldFixAssignments()) {<a name="line.2401"></a> -<span class="sourceLineNo">2402</span> undeployRegions(hbi);<a name="line.2402"></a> -<span class="sourceLineNo">2403</span> }<a name="line.2403"></a> -<span class="sourceLineNo">2404</span><a name="line.2404"></a> -<span class="sourceLineNo">2405</span> } else if (!inMeta && inHdfs && !isDeployed) {<a name="line.2405"></a> -<span class="sourceLineNo">2406</span> if (hbi.isMerged()) {<a name="line.2406"></a> -<span class="sourceLineNo">2407</span> // This region has already been merged, the remaining hdfs file will be<a name="line.2407"></a> -<span class="sourceLineNo">2408</span> // cleaned by CatalogJanitor later<a name="line.2408"></a> -<span class="sourceLineNo">2409</span> hbi.setSkipChecks(true);<a name="line.2409"></a> -<span class="sourceLineNo">2410</span> LOG.info("Region " + descriptiveName<a name="line.2410"></a> -<span class="sourceLineNo">2411</span> + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");<a name="line.2411"></a> -<span class="sourceLineNo">2412</span> return;<a name="line.2412"></a> -<span class="sourceLineNo">2413</span> }<a name="line.2413"></a> -<span class="sourceLineNo">2414</span> errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "<a name="line.2414"></a> -<span class="sourceLineNo">2415</span> + descriptiveName + " on HDFS, but not listed in hbase:meta " +<a name="line.2415"></a> -<span class="sourceLineNo">2416</span> "or deployed on any region server");<a name="line.2416"></a> -<span class="sourceLineNo">2417</span> // restore region consistency of an adopted orphan<a name="line.2417"></a> -<span class="sourceLineNo">2418</span> if (shouldFixMeta()) {<a name="line.2418"></a> -<span class="sourceLineNo">2419</span> if (!hbi.isHdfsRegioninfoPresent()) {<a name="line.2419"></a> -<span class="sourceLineNo">2420</span> LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"<a name="line.2420"></a> -<span class="sourceLineNo">2421</span> + " in table integrity repair phase if -fixHdfsOrphans was" +<a name="line.2421"></a> -<span class="sourceLineNo">2422</span> " used.");<a name="line.2422"></a> -<span class="sourceLineNo">2423</span> return;<a name="line.2423"></a> -<span class="sourceLineNo">2424</span> }<a name="line.2424"></a> -<span class="sourceLineNo">2425</span><a name="line.2425"></a> -<span class="sourceLineNo">2426</span> RegionInfo hri = hbi.getHdfsHRI();<a name="line.2426"></a> -<span class="sourceLineNo">2427</span> TableInfo tableInfo = tablesInfo.get(hri.getTable());<a name="line.2427"></a> -<span class="sourceLineNo">2428</span><a name="line.2428"></a> -<span class="sourceLineNo">2429</span> for (RegionInfo region : tableInfo.getRegionsFromMeta()) {<a name="line.2429"></a> -<span class="sourceLineNo">2430</span> if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0<a name="line.2430"></a> -<span class="sourceLineNo">2431</span> && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),<a name="line.2431"></a> -<span class="sourceLineNo">2432</span> hri.getEndKey()) >= 0)<a name="line.2432"></a> -<span class="sourceLineNo">2433</span> && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {<a name="line.2433"></a> -<span class="sourceLineNo">2434</span> if(region.isSplit() || region.isOffline()) continue;<a name="line.2434"></a> -<span class="sourceLineNo">2435</span> Path regionDir = hbi.getHdfsRegionDir();<a name="line.2435"></a> -<span class="sourceLineNo">2436</span> FileSystem fs = regionDir.getFileSystem(getConf());<a name="line.2436"></a> -<span class="sourceLineNo">2437</span> List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);<a name="line.2437"></a> -<span class="sourceLineNo">2438</span> for (Path familyDir : familyDirs) {<a name="line.2438"></a> -<span class="sourceLineNo">2439</span> List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);<a name="line.2439"></a> -<span class="sourceLineNo">2440</span> for (Path referenceFilePath : referenceFilePaths) {<a name="line.2440"></a> -<span class="sourceLineNo">2441</span> Path parentRegionDir =<a name="line.2441"></a> -<span class="sourceLineNo">2442</span> StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();<a name="line.2442"></a> -<span class="sourceLineNo">2443</span> if (parentRegionDir.toString().endsWith(region.getEncodedName())) {<a name="line.2443"></a> -<span class="sourceLineNo">2444</span> LOG.warn(hri + " start and stop keys are in the range of " + region<a name="line.2444"></a> -<span class="sourceLineNo">2445</span> + ". The region might not be cleaned up from hdfs when region " + region<a name="line.2445"></a> -<span class="sourceLineNo">2446</span> + " split failed. Hence deleting from hdfs.");<a name="line.2446"></a> -<span class="sourceLineNo">2447</span> HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,<a name="line.2447"></a> -<span class="sourceLineNo">2448</span> regionDir.getParent(), hri);<a name="line.2448"></a> -<span class="sourceLineNo">2449</span> return;<a name="line.2449"></a> -<span class="sourceLineNo">2450</span> }<a name="line.2450"></a> -<span class="sourceLineNo">2451</span> }<a name="line.2451"></a> -<span class="sourceLineNo">2452</span> }<a name="line.2452"></a> -<span class="sourceLineNo">2453</span> }<a name="line.2453"></a> -<span class="sourceLineNo">2454</span> }<a name="line.2454"></a> -<span class="sourceLineNo">2455</span> LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());<a name="line.2455"></a> -<span class="sourceLineNo">2456</span> int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();<a name="line.2456"></a> -<span class="sourceLineNo">2457</span> HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),<a name="line.2457"></a> -<span class="sourceLineNo">2458</span> admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))<a name="line.2458"></a> -<span class="sourceLineNo">2459</span> .getLiveServerMetrics().keySet(), numReplicas);<a name="line.2459"></a> -<span class="sourceLineNo">2460</span><a name="line.2460"></a> -<span class="sourceLineNo">2461</span> tryAssignmentRepair(hbi, "Trying to reassign region...");<a name="line.2461"></a> -<span class="sourceLineNo">2462</span> }<a name="line.2462"></a> -<span class="sourceLineNo">2463</span><a name="line.2463"></a> -<span class="sourceLineNo">2464</span> } else if (!inMeta && inHdfs && isDeployed) {<a name="line.2464"></a> -<span class="sourceLineNo">2465</span> errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName<a name="line.2465"></a> -<span class="sourceLineNo">2466</span> + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));<a name="line.2466"></a> -<span class="sourceLineNo">2467</span> debugLsr(hbi.getHdfsRegionDir());<a name="line.2467"></a> -<span class="sourceLineNo">2468</span> if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {<a name="line.2468"></a> -<span class="sourceLineNo">2469</span> // for replicas, this means that we should undeploy the region (we would have<a name="line.2469"></a> -<span class="sourceLineNo">2470</span> // gone over the primaries and fixed meta holes in first phase under<a name="line.2470"></a> -<span class="sourceLineNo">2471</span> // checkAndFixConsistency; we shouldn't get the condition !inMeta at<a name="line.2471"></a> -<span class="sourceLineNo">2472</span> // this stage unless unwanted replica)<a name="line.2472"></a> -<span class="sourceLineNo">2473</span> if (shouldFixAssignments()) {<a name="line.2473"></a> -<span class="sourceLineNo">2474</span> undeployRegionsForHbi(hbi);<a name="line.2474"></a> -<span class="sourceLineNo">2475</span> }<a name="line.2475"></a> -<span class="sourceLineNo">2476</span> }<a name="line.2476"></a> -<span class="sourceLineNo">2477</span> if (shouldFixMeta() && hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {<a name="line.2477"></a> -<span class="sourceLineNo">2478</span> if (!hbi.isHdfsRegioninfoPresent()) {<a name="line.2478"></a> -<span class="sourceLineNo">2479</span> LOG.error("This should have been repaired in table integrity repair phase");<a name="line.2479"></a> -<span class="sourceLineNo">2480</span> return;<a name="line.2480"></a> -<span class="sourceLineNo">2481</span> }<a name="line.2481"></a> -<span class="sourceLineNo">2482</span><a name="line.2482"></a> -<span class="sourceLineNo">2483</span> LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());<a name="line.2483"></a> -<span class="sourceLineNo">2484</span> int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();<a name="line.2484"></a> -<span class="sourceLineNo">2485</span> HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),<a name="line.2485"></a> -<span class="sourceLineNo">2486</span> admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))<a name="line.2486"></a> -<span class="sourceLineNo">2487</span> .getLiveServerMetrics().keySet(), numReplicas);<a name="line.2487"></a> -<span class="sourceLineNo">2488</span> tryAssignmentRepair(hbi, "Trying to fix unassigned region...");<a name="line.2488"></a> -<span class="sourceLineNo">2489</span> }<a name="line.2489"></a> -<span class="sourceLineNo">2490</span><a name="line.2490"></a> -<span class="sourceLineNo">2491</span> // ========== Cases where the region is in hbase:meta =============<a name="line.2491"></a> -<span class="sourceLineNo">2492</span> } else if (inMeta && inHdfs && !isDeployed && splitParent) {<a name="line.2492"></a> -<span class="sourceLineNo">2493</span> // check whether this is an actual error, or just transient state where parent<a name="line.2493"></a> -<span class="sourceLineNo">2494</span> // is not cleaned<a name="line.2494"></a> -<span class="sourceLineNo">2495</span> if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {<a name="line.2495"></a> -<span class="sourceLineNo">2496</span> // check that split daughters are there<a name="line.2496"></a> -<span class="sourceLineNo">2497</span> HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());<a name="line.2497"></a> -<span class="sourceLineNo">2498</span> HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());<a name="line.2498"></a> -<span class="sourceLineNo">2499</span> if (infoA != null && infoB != null) {<a name="line.2499"></a> -<span class="sourceLineNo">2500</span> // we already processed or will process daughters. Move on, nothing to see here.<a name="line.2500"></a> -<span class="sourceLineNo">2501</span> hbi.setSkipChecks(true);<a name="line.2501"></a> -<span class="sourceLineNo">2502</span> return;<a name="line.2502"></a> -<span class="sourceLineNo">2503</span> }<a name="line.2503"></a> -<span class="sourceLineNo">2504</span> }<a name="line.2504"></a> -<span class="sourceLineNo">2505</span><a name="line.2505"></a> -<span class="sourceLineNo">2506</span> // For Replica region, we need to do a similar check. If replica is not split successfully,<a name="line.2506"></a> -<span class="sourceLineNo">2507</span> // error is going to be reported against primary daughter region.<a name="line.2507"></a> -<span class="sourceLineNo">2508</span> if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {<a name="line.2508"></a> -<span class="sourceLineNo">2509</span> LOG.info("Region " + descriptiveName + " is a split parent in META, in HDFS, "<a name="line.2509"></a> -<span class="sourceLineNo">2510</span> + "and not deployed on any region server. This may be transient.");<a name="line.2510"></a> -<span class="sourceLineNo">2511</span> hbi.setSkipChecks(true);<a name="line.2511"></a> -<span class="sourceLineNo">2512</span> return;<a name="line.2512"></a> -<span class="sourceLineNo">2513</span> }<a name="line.2513"></a> -<span class="sourceLineNo">2514</span><a name="line.2514"></a> -<span class="sourceLineNo">2515</span> errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "<a name="line.2515"></a> -<span class="sourceLineNo">2516</span> + descriptiveName + " is a split parent in META, in HDFS, "<a name="line.2516"></a> -<span class="sourceLineNo">2517</span> + "and not deployed on any region server. This could be transient, "<a name="line.2517"></a> -<span class="sourceLineNo">2518</span> + "consider to run the catalog janitor first!");<a name="line.2518"></a> -<span class="sourceLineNo">2519</span> if (shouldFixSplitParents()) {<a name="line.2519"></a> -<span class="sourceLineNo">2520</span> setShouldRerun();<a name="line.2520"></a> -<span class="sourceLineNo">2521</span> resetSplitParent(hbi);<a name="line.2521"></a> -<span class="sourceLineNo">2522</span> }<a name="line.2522"></a> -<span class="sourceLineNo">2523</span> } else if (inMeta && !inHdfs && !isDeployed) {<a name="line.2523"></a> -<span class="sourceLineNo">2524</span> errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "<a name="line.2524"></a> -<span class="sourceLineNo">2525</span> + descriptiveName + " found in META, but not in HDFS "<a name="line.2525"></a> -<span class="sourceLineNo">2526</span> + "or deployed on any region server.");<a name="line.2526"></a> -<span class="sourceLineNo">2527</span> if (shouldFixMeta()) {<a name="line.2527"></a> -<span class="sourceLineNo">2528</span> deleteMetaRegion(hbi);<a name="line.2528"></a> -<span class="sourceLineNo">2529</span> }<a name="line.2529"></a> -<span class="sourceLineNo">2530</span> } else if (inMeta && !inHdfs && isDeployed) {<a name="line.2530"></a> -<span class="sourceLineNo">2531</span> errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName<a name="line.2531"></a> -<span class="sourceLineNo">2532</span> + " found in META, but not in HDFS, " +<a name="line.2532"></a> -<span class="sourceLineNo">2533</span> "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));<a name="line.2533"></a> -<span class="sourceLineNo">2534</span> // We treat HDFS as ground truth. Any information in meta is transient<a name="line.2534"></a> -<span class="sourceLineNo">2535</span> // and equivalent data can be regenerated. So, lets unassign and remove<a name="line.2535"></a> -<span class="sourceLineNo">2536</span> // these problems from META.<a name="line.2536"></a> -<span class="sourceLineNo">2537</span> if (shouldFixAssignments()) {<a name="line.2537"></a> -<span class="sourceLineNo">2538</span> errors.print("Trying to fix unassigned region...");<a name="line.2538"></a> -<span class="sourceLineNo">2539</span> undeployRegions(hbi);<a name="line.2539"></a> -<span class="sourceLineNo">2540</span> }<a name="line.2540"></a> -<span class="sourceLineNo">2541</span> if (shouldFixMeta()) {<a name="line.2541"></a> -<span class="sourceLineNo">2542</span> // wait for it to complete<a name="line.2542"></a> -<span class="sourceLineNo">2543</span> deleteMetaRegion(hbi);<a name="line.2543"></a> -<span class="sourceLineNo">2544</span> }<a name="line.2544"></a> -<span class="sourceLineNo">2545</span> } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {<a name="line.2545"></a> -<span class="sourceLineNo">2546</span> errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName<a name="line.2546"></a> -<span class="sourceLineNo">2547</span> + " not deployed on any region server.");<a name="line.2547"></a> -<span class="sourceLineNo">2548</span> tryAssignmentRepair(hbi, "Trying to fix unassigned region...");<a name="line.2548"></a> -<span class="sourceLineNo">2549</span> } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {<a name="line.2549"></a> -<span class="sourceLineNo">2550</span> errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,<a name="line.2550"></a> -<span class="sourceLineNo">2551</span> "Region " + descriptiveName + " should not be deployed according " +<a name="line.2551"></a> -<span class="sourceLineNo">2552</span> "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));<a name="line.2552"></a> -<span class="sourceLineNo">2553</span> if (shouldFixAssignments()) {<a name="line.2553"></a> -<span class="sourceLineNo">2554</span> errors.print("Trying to close the region " + descriptiveName);<a name="line.2554"></a> -<span class="sourceLineNo">2555</span> setShouldRerun();<a name="line.2555"></a> -<span class="sourceLineNo">2556</span> HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);<a name="line.2556"></a> -<span class="sourceLineNo">2557</span> }<a name="line.2557"></a> -<span class="sourceLineNo">2558</span> } else if (inMeta && inHdfs && isMultiplyDeployed) {<a name="line.2558"></a> -<span class="sourceLineNo">2559</span> errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName<a name="line.2559"></a> -<span class="sourceLineNo">2560</span> + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer<a name="line.2560"></a> -<span class="sourceLineNo">2561</span> + " but is multiply assigned to region servers " +<a name="line.2561"></a> -<span class="sourceLineNo">2562</span> Joiner.on(", ").join(hbi.deployedOn));<a name="line.2562"></a> -<span class="sourceLineNo">2563</span> // If we are trying to fix the errors<a name="line.2563"></a> -<span class="sourceLineNo">2564</span> if (shouldFixAssignments()) {<a name="line.2564"></a> -<span class="sourceLineNo">2565</span> errors.print("Trying to fix assignment error...");<a name="line.2565"></a> -<span class="sourceLineNo">2566</span> setShouldRerun();<a name="line.2566"></a> -<span class="sourceLineNo">2567</span> HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);<a name="line.2567"></a> -<span class="sourceLineNo">2568</span> }<a name="line.2568"></a> -<span class="sourceLineNo">2569</span> } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {<a name="line.2569"></a> -<span class="sourceLineNo">2570</span> errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "<a name="line.2570"></a> -<span class="sourceLineNo">2571</span> + descriptiveName + " listed in hbase:meta on region server " +<a name="line.2571"></a> -<span class="sourceLineNo">2572</span> hbi.metaEntry.regionServer + " but found on region server " +<a name="line.2572"></a> -<span class="sourceLineNo">2573</span> hbi.deployedOn.get(0));<a name="line.2573"></a> -<span class="sourceLineNo">2574</span> // If we are trying to fix the errors<a name="line.2574"></a> -<span class="sourceLineNo">2575</span> if (shouldFixAssignments()) {<a name="line.2575"></a> -<span class="sourceLineNo">2576</span> errors.print("Trying to fix assignment error...");<a name="line.2576"></a> -<span class="sourceLineNo">2577</span> setShouldRerun();<a name="line.2577"></a> -<span class="sourceLineNo">2578</span> HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);<a name="line.2578"></a> -<span class="sourceLineNo">2579</span> HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());<a name="line.2579"></a> -<span class="sourceLineNo">2580</span> }<a name="line.2580"></a> -<span class="sourceLineNo">2581</span> } else {<a name="line.2581"></a> -<span class="sourceLineNo">2582</span> errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +<a name="line.2582"></a> -<span class="sourceLineNo">2583</span> " is in an unforeseen state:" +<a name="line.2583"></a> -<span class="sourceLineNo">2584</span> " inMeta=" + inMeta +<a name="line.2584"></a> -<span class="sourceLineNo">2585</span> " inHdfs=" + inHdfs +<a name="line.2585"></a> -<span class="sourceLineNo">2586</span> " isDeployed=" + isDeployed +<a name="line.2586"></a> -<span class="sourceLineNo">2587</span> " isMultiplyDeployed=" + isMultiplyDeployed +<a name="line.2587"></a> -<span class="sourceLineNo">2588</span> " deploymentMatchesMeta=" + deploymentMatchesMeta +<a name="line.2588"></a> -<span class="sourceLineNo">2589</span> " shouldBeDeployed=" + shouldBeDeployed);<a name="line.2589"></a> -<span class="sourceLineNo">2590</span> }<a name="line.2590"></a> -<span class="sourceLineNo">2591</span> }<a name="line.2591"></a> -<span class="sourceLineNo">2592</span><a name="line.2592"></a> -<span class="sourceLineNo">2593</span> /**<a name="line.2593"></a> -<span class="sourceLineNo">2594</span> * Checks tables integrity. Goes over all regions and scans the tables.<a name="line.2594"></a> -<span class="sourceLineNo">2595</span> * Collects all the pieces for each table and checks if there are missing,<a name="line.2595"></a> -<span class="sourceLineNo">2596</span> * repeated or overlapping ones.<a name="line.2596"></a> -<span class="sourceLineNo">2597</span> * @throws IOException<a name="line.2597"></a> -<span class="sourceLineNo">2598</span> */<a name="line.2598"></a> -<span class="sourceLineNo">2599</span> SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {<a name="line.2599"></a> -<span class="sourceLineNo">2600</span> tablesInfo = new TreeMap<>();<a name="line.2600"></a> -<span class="sourceLineNo">2601</span> LOG.debug("There are " + regionInfoMap.size() + " region info entries");<a name="line.2601"></a> -<span class="sourceLineNo">2602</span> for (HbckInfo hbi : regionInfoMap.values()) {<a name="line.2602"></a> -<span class="sourceLineNo">2603</span> // Check only valid, working regions<a name="line.2603"></a> -<span class="sourceLineNo">2604</span> if (hbi.metaEntry == null) {<a name="line.2604"></a> -<span class="sourceLineNo">2605</span> // this assumes that consistency check has run loadMetaEntry<a name="line.2605"></a> -<span class="sourceLineNo">2606</span> Path p = hbi.getHdfsRegionDir();<a name="line.2606"></a> -<span class="sourceLineNo">2607</span> if (p == null) {<a name="line.2607"></a> -<span class="sourceLineNo">2608</span> errors.report("No regioninfo in Meta or HDFS. " + hbi);<a name="line.2608"></a> -<span class="sourceLineNo">2609</span> }<a name="line.2609"></a> -<span class="sourceLineNo">2610</span><a name="line.2610"></a> -<span class="sourceLineNo">2611</span> // TODO test.<a name="line.2611"></a> -<span class="sourceLineNo">2612</span> continue;<a name="line.2612"></a> -<span class="sourceLineNo">2613</span> }<a name="line.2613"></a> -<span class="sourceLineNo">2614</span> if (hbi.metaEntry.regionServer == null) {<a name="line.2614"></a> -<span class="sourceLineNo">2615</span> errors.detail("Skipping region because no region server: " + hbi);<a name="line.2615"></a> -<span class="sourceLineNo">2616</span> continue;<a name="line.2616"></a> -<span class="sourceLineNo">2617</span> }<a name="line.2617"></a> -<span class="sourceLineNo">2618</span> if (hbi.metaEntry.isOffline()) {<a name="line.2618"></a> -<span class="sourceLineNo">2619</span> errors.detail("Skipping region because it is offline: " + hbi);<a name="line.2619"></a> -<span class="sourceLineNo">2620</span> continue;<a name="line.2620"></a> -<span class="sourceLineNo">2621</span> }<a name="line.2621"></a> -<span class="sourceLineNo">2622</span> if (hbi.containsOnlyHdfsEdits()) {<a name="line.2622"></a> -<span class="sourceLineNo">2623</span> errors.detail("Skipping region because it only contains edits" + hbi);<a name="line.2623"></a> -<span class="sourceLineNo">2624</span> continue;<a name="line.2624"></a> -<span class="sourceLineNo">2625</span> }<a name="line.2625"></a> -<span class="sourceLineNo">2626</span><a name="line.2626"></a> -<span class="sourceLineNo">2627</span> // Missing regionDir or over-deployment is checked elsewhere. Include<a name="line.2627"></a> -<span class="sourceLineNo">2628</span> // these cases in modTInfo, so we can evaluate those regions as part of<a name="line.2628"></a> -<span class="sourceLineNo">2629</span> // the region chain in META<a name="line.2629"></a> -<span class="sourceLineNo">2630</span> //if (hbi.foundRegionDir == null) continue;<a name="line.2630"></a> -<span class="sourceLineNo">2631</span> //if (hbi.deployedOn.size() != 1) continue;<a name="line.2631"></a> -<span class="sourceLineNo">2632</span> if (hbi.deployedOn.isEmpty()) continue;<a name="line.2632"></a> -<span class="sourceLineNo">2633</span><a name="line.2633"></a> -<span class="sourceLineNo">2634</span> // We should be safe here<a name="line.2634"></a> -<span class="sourceLineNo">2635</span> TableName tableName = hbi.metaEntry.getTable();<a name="line.2635"></a> -<span class="sourceLineNo">2636</span> TableInfo modTInfo = tablesInfo.get(tableName);<a name="line.2636"></a> -<span class="sourceLineNo">2637</span> if (modTInfo == null) {<a name="line.2637"></a> -<span class="sourceLineNo">2638</span> modTInfo = new TableInfo(tableName);<a name="line.2638"></a> -<span class="sourceLineNo">2639</span> }<a name="line.2639"></a> -<span class="sourceLineNo">2640</span> for (ServerName server : hbi.deployedOn) {<a name="line.2640"></a> -<span class="sourceLineNo">2641</span> modTInfo.addServer(server);<a name="line.2641"></a> -<span class="sourceLineNo">2642</span> }<a name="line.2642"></a> -<span class="sourceLineNo">2643</span><a name="line.2643"></a> -<span class="sourceLineNo">2644</span> if (!hbi.isSkipChecks()) {<a name="line.2644"></a> -<span class="sourceLineNo">2645</span> modTInfo.addRegionInfo(hbi);<a name="line.2645"></a> -<span class="sourceLineNo">2646</span> }<a name="line.2646"></a> -<span class="sourceLineNo">2647</span><a name="line.2647"></a> -<span class="sourceLineNo">2648</span> tablesInfo.put(tableName, modTInfo);<a name="line.2648"></a> -<span class="sourceLineNo">2649</span> }<a name="line.2649"></a> -<span class="sourceLineNo">2650</span><a name="line.2650"></a> -<span class="sourceLineNo">2651</span> loadTableInfosForTablesWithNoRegion();<a name="line.2651"></a> +<span class="sourceLineNo">2108</span> } else if (!checkMetaOnly) {<a name="line.2108"></a> +<span class="sourceLineNo">2109</span> // dangling table state in meta if checkMetaOnly is false. If checkMetaOnly is<a name="line.2109"></a> +<span class="sourceLineNo">2110</span> // true, tableInfo will be null as tablesInfo are not polulated for all tables from hdfs<a name="line.2110"></a> +<span class="sourceLineNo">2111</span> errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,<a name="line.2111"></a> +<span class="sourceLineNo">2112</span> tableName + " has dangling table state " + tableState);<a name="line.2112"></a> +<span class="sourceLineNo">2113</span> }<a name="line.2113"></a> +<span class="sourceLineNo">2114</span> }<a name="line.2114"></a> +<span class="sourceLineNo">2115</span> }<a name="line.2115"></a> +<span class="sourceLineNo">2116</span> // check that all tables have states<a name="line.2116"></a> +<span class="sourceLineNo">2117</span> for (TableName tableName : tablesInfo.keySet()) {<a name="line.2117"></a> +<span class="sourceLineNo">2118</span> if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {<a name="line.2118"></a> +<span class="sourceLineNo">2119</span> if (fixMeta) {<a name="line.2119"></a> +<span class="sourceLineNo">2120</span> MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);<a name="line.2120"></a> +<span class="sourceLineNo">2121</span> TableState newState = MetaTableAccessor.getTableState(connection, tableName);<a name="line.2121"></a> +<span class="sourceLineNo">2122</span> if (newState == null) {<a name="line.2122"></a> +<span class="sourceLineNo">2123</span> errors.reportError(ERROR_CODE.NO_TABLE_STATE,<a name="line.2123"></a> +<span class="sourceLineNo">2124</span> "Unable to change state for table " + tableName + " in meta ");<a name="line.2124"></a> +<span class="sourceLineNo">2125</span> }<a name="line.2125"></a> +<span class="sourceLineNo">2126</span> } else {<a name="line.2126"></a> +<span class="sourceLineNo">2127</span> errors.reportError(ERROR_CODE.NO_TABLE_STATE,<a name="line.2127"></a> +<span class="sourceLineNo">2128</span> tableName + " has no state in meta ");<a name="line.2128"></a> +<span class="sourceLineNo">2129</span> }<a name="line.2129"></a> +<span class="sourceLineNo">2130</span> }<a name="line.2130"></a> +<span class="sourceLineNo">2131</span> }<a name="line.2131"></a> +<span class="sourceLineNo">2132</span> }<a name="line.2132"></a> +<span class="sourceLineNo">2133</span><a name="line.2133"></a> +<span class="sourceLineNo">2134</span> private void preCheckPermission() throws IOException, AccessDeniedException {<a name="line.2134"></a> +<span class="sourceLineNo">2135</span> if (shouldIgnorePreCheckPermission()) {<a name="line.2135"></a> +<span class="sourceLineNo">2136</span> return;<a name="line.2136"></a> +<span class="sourceLineNo">2137</span> }<a name="line.2137"></a> +<span class="sourceLineNo">2138</span><a name="line.2138"></a> +<span class="sourceLineNo">2139</span> Path hbaseDir = FSUtils.getRootDir(getConf());<a name="line.2139"></a> +<span class="sourceLineNo">2140</span> FileSystem fs = hbaseDir.getFileSystem(getConf());<a name="line.2140"></a> +<span class="sourceLineNo">2141</span> UserProvider userProvider = UserProvider.instantiate(getConf());<a name="line.2141"></a> +<span class="sourceLineNo">2142</span> UserGroupInformation ugi = userProvider.getCurrent().getUGI();<a name="line.2142"></a> +<span class="sourceLineNo">2143</span> FileStatus[] files = fs.listStatus(hbaseDir);<a name="line.2143"></a> +<span class="sourceLineNo">2144</span> for (FileStatus file : files) {<a name="line.2144"></a> +<span class="sourceLineNo">2145</span> try {<a name="line.2145"></a> +<span class="sourceLineNo">2146</span> FSUtils.checkAccess(ugi, file, FsAction.WRITE);<a name="line.2146"></a> +<span class="sourceLineNo">2147</span> } catch (AccessDeniedException ace) {<a name="line.2147"></a> +<span class="sourceLineNo">2148</span> LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);<a name="line.2148"></a> +<span class="sourceLineNo">2149</span> errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()<a name="line.2149"></a> +<span class="sourceLineNo">2150</span> + " does not have write perms to " + file.getPath()<a name="line.2150"></a> +<span class="sourceLineNo">2151</span> + ". Please rerun hbck as hdfs user " + file.getOwner());<a name="line.2151"></a> +<span class="sourceLineNo">2152</span> throw ace;<a name="line.2152"></a> +<span class="sourceLineNo">2153</span> }<a name="line.2153"></a> +<span class="sourceLineNo">2154</span> }<a name="line.2154"></a> +<span class="sourceLineNo">2155</span> }<a name="line.2155"></a> +<span class="sourceLineNo">2156</span><a name="line.2156"></a> +<span class="sourceLineNo">2157</span> /**<a name="line.2157"></a> +<span class="sourceLineNo">2158</span> * Deletes region from meta table<a name="line.2158"></a> +<span class="sourceLineNo">2159</span> */<a name="line.2159"></a> +<span class="sourceLineNo">2160</span> private void deleteMetaRegion(HbckInfo hi) throws IOException {<a name="line.2160"></a> +<span class="sourceLineNo">2161</span> deleteMetaRegion(hi.metaEntry.getRegionName());<a name="line.2161"></a> +<span class="sourceLineNo">2162</span> }<a name="line.2162"></a> +<span class="sourceLineNo">2163</span><a name="line.2163"></a> +<span class="sourceLineNo">2164</span> /**<a name="line.2164"></a> +<span class="sourceLineNo">2165</span> * Deletes region from meta table<a name="line.2165"></a> +<span class="sourceLineNo">2166</span> */<a name="line.2166"></a> +<span class="sourceLineNo">2167</span> private void deleteMetaRegion(byte[] metaKey) throws IOException {<a name="line.2167"></a> +<span class="sourceLineNo">2168</span> Delete d = new Delete(metaKey);<a name="line.2168"></a> +<span class="sourceLineNo">2169</span> meta.delete(d);<a name="line.2169"></a> +<span class="sourceLineNo">2170</span> LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );<a name="line.2170"></a> +<span class="sourceLineNo">2171</span> }<a name="line.2171"></a> +<span class="sourceLineNo">2172</span><a name="line.2172"></a> +<span class="sourceLineNo">2173</span> /**<a name="line.2173"></a> +<span class="sourceLineNo">2174</span> * Reset the split parent region info in meta table<a name="line.2174"></a> +<span class="sourceLineNo">2175</span> */<a name="line.2175"></a> +<span class="sourceLineNo">2176</span> private void resetSplitParent(HbckInfo hi) throws IOException {<a name="line.2176"></a> +<span class="sourceLineNo">2177</span> RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());<a name="line.2177"></a> +<span class="sourceLineNo">2178</span> Delete d = new Delete(hi.metaEntry.getRegionName());<a name="line.2178"></a> +<span class="sourceLineNo">2179</span> d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);<a name="line.2179"></a> +<span class="sourceLineNo">2180</span> d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);<a name="line.2180"></a> +<span class="sourceLineNo">2181</span> mutations.add(d);<a name="line.2181"></a> +<span class="sourceLineNo">2182</span><a name="line.2182"></a> +<span class="sourceLineNo">2183</span> RegionInfo hri = RegionInfoBuilder.newBuilder(hi.metaEntry)<a name="line.2183"></a> +<span class="sourceLineNo">2184</span> .setOffline(false)<a name="line.2184"></a> +<span class="sourceLineNo">2185</span> .setSplit(false)<a name="line.2185"></a> +<span class="sourceLineNo">2186</span> .build();<a name="line.2186"></a> +<span class="sourceLineNo">2187</span> Put p = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime());<a name="line.2187"></a> +<span class="sourceLineNo">2188</span> mutations.add(p);<a name="line.2188"></a> +<span class="sourceLineNo">2189</span><a name="line.2189"></a> +<span class="sourceLineNo">2190</span> meta.mutateRow(mutations);<a name="line.2190"></a> +<span class="sourceLineNo">2191</span> LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );<a name="line.2191"></a> +<span class="sourceLineNo">2192</span> }<a name="line.2192"></a> +<span class="sourceLineNo">2193</span><a name="line.2193"></a> +<span class="sourceLineNo">2194</span> /**<a name="line.2194"></a> +<span class="sourceLineNo">2195</span> * This backwards-compatibility wrapper for permanently offlining a region<a name="line.2195"></a> +<span class="sourceLineNo">2196</span> * that should not be alive. If the region server does not support the<a name="line.2196"></a> +<span class="sourceLineNo">2197</span> * "offline" method, it will use the closest unassign method instead. This<a name="line.2197"></a> +<span class="sourceLineNo">2198</span> * will basically work until one attempts to disable or delete the affected<a name="line.2198"></a> +<span class="sourceLineNo">2199</span> * table. The problem has to do with in-memory only master state, so<a name="line.2199"></a> +<span class="sourceLineNo">2200</span> * restarting the HMaster or failing over to another should fix this.<a name="line.2200"></a> +<span class="sourceLineNo">2201</span> */<a name="line.2201"></a> +<span class="sourceLineNo">2202</span> private void offline(byte[] regionName) throws IOException {<a name="line.2202"></a> +<span class="sourceLineNo">2203</span> String regionString = Bytes.toStringBinary(regionName);<a name="line.2203"></a> +<span class="sourceLineNo">2204</span> if (!rsSupportsOffline) {<a name="line.2204"></a> +<span class="sourceLineNo">2205</span> LOG.warn("Using unassign region " + regionString<a name="line.2205"></a> +<span class="sourceLineNo">2206</span> + " instead of using offline method, you should"<a name="line.2206"></a> +<span class="sourceLineNo">2207</span> + " restart HMaster after these repairs");<a name="line.2207"></a> +<span class="sourceLineNo">2208</span> admin.unassign(regionName, true);<a name="line.2208"></a> +<span class="sourceLineNo">2209</span> return;<a name="line.2209"></a> +<span class="sourceLineNo">2210</span> }<a name="line.2210"></a> +<span class="sourceLineNo">2211</span><a name="line.2211"></a> +<span class="sourceLineNo">2212</span> // first time we assume the rs's supports #offline.<a name="line.2212"></a> +<span class="sourceLineNo">2213</span> try {<a name="line.2213"></a> +<span class="sourceLineNo">2214</span> LOG.info("Offlining region " + regionString);<a name="line.2214"></a> +<span class="sourceLineNo">2215</span> admin.offline(regionName);<a name="line.2215"></a> +<span class="sourceLineNo">2216</span> } catch (IOException ioe) {<a name="line.2216"></a> +<span class="sourceLineNo">2217</span> String notFoundMsg = "java.lang.NoSuchMethodException: " +<a name="line.2217"></a> +<span class="sourceLineNo">2218</span> "org.apache.hadoop.hbase.master.HMaster.offline([B)";<a name="line.2218"></a> +<span class="sourceLineNo">2219</span> if (ioe.getMessage().contains(notFoundMsg)) {<a name="line.2219"></a> +<span class="sourceLineNo">2220</span> LOG.warn("Using unassign region " + regionString<a name="line.2220"></a> +<span class="sourceLineNo">2221</span> + " instead of using offline method, you should"<a name="line.2221"></a> +<span class="sourceLineNo">2222</span> + " restart HMaster after these repairs");<a name="line.2222"></a> +<span class="sourceLineNo">2223</span> rsSupportsOffline = false; // in the future just use unassign<a name="line.2223"></a> +<span class="sourceLineNo">2224</span> admin.unassign(regionName, true);<a name="line.2224"></a> +<span class="sourceLineNo">2225</span> return;<a name="line.2225"></a> +<span class="sourceLineNo">2226</span> }<a name="line.2226"></a> +<span class="sourceLineNo">2227</span> throw ioe;<a name="line.2227"></a> +<span class="sourceLineNo">2228</span> }<a name="line.2228"></a> +<span class="sourceLineNo">2229</span> }<a name="line.2229"></a> +<span class="sourceLineNo">2230</span><a name="line.2230"></a> +<span class="sourceLineNo">2231</span> private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {<a name="line.2231"></a> +<span class="sourceLineNo">2232</span> undeployRegionsForHbi(hi);<a name="line.2232"></a> +<span class="sourceLineNo">2233</span> // undeploy replicas of the region (but only if the method is invoked for the primary)<a name="line.2233"></a> +<span class="sourceLineNo">2234</span> if (hi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {<a name="line.2234"></a> +<span class="sourceLineNo">2235</span> return;<a name="line.2235"></a> +<span class="sourceLineNo">2236</span> }<a name="line.2236"></a> +<span class="sourceLineNo">2237</span> int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();<a name="line.2237"></a> +<span class="sourceLineNo">2238</span> for (int i = 1; i < numReplicas; i++) {<a name="line.2238"></a> +<span class="sourceLineNo">2239</span> if (hi.getPrimaryHRIForDeployedReplica() == null) continue;<a name="line.2239"></a> +<span class="sourceLineNo">2240</span> RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(<a name="line.2240"></a> +<span class="sourceLineNo">2241</span> hi.getPrimaryHRIForDeployedReplica(), i);<a name="line.2241"></a> +<span class="sourceLineNo">2242</span> HbckInfo h = regionInfoMap.get(hri.getEncodedName());<a name="line.2242"></a> +<span class="sourceLineNo">2243</span> if (h != null) {<a name="line.2243"></a> +<span class="sourceLineNo">2244</span> undeployRegionsForHbi(h);<a name="line.2244"></a> +<span class="sourceLineNo">2245</span> //set skip checks; we undeployed it, and we don't want to evaluate this anymore<a name="line.2245"></a> +<span class="sourceLineNo">2246</span> //in consistency checks<a name="line.2246"></a> +<span class="sourceLineNo">2247</span> h.setSkipChecks(true);<a name="line.2247"></a> +<span class="sourceLineNo">2248</span> }<a name="line.2248"></a> +<span class="sourceLineNo">2249</span> }<a name="line.2249"></a> +<span class="sourceLineNo">2250</span> }<a name="line.2250"></a> +<span class="sourceLineNo">2251</span><a name="line.2251"></a> +<span class="sourceLineNo">2252</span> private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {<a name="line.2252"></a> +<span class="sourceLineNo">2253</span> for (OnlineEntry rse : hi.deployedEntries) {<a name="line.2253"></a> +<span class="sourceLineNo">2254</span> LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa);<a name="line.2254"></a> +<span class="sourceLineNo">2255</span> try {<a name="line.2255"></a> +<span class="sourceLineNo">2256</span> HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);<a name="line.2256"></a> +<span class="sourceLineNo">2257</span> offline(rse.hri.getRegionName());<a name="line.2257"></a> +<span class="sourceLineNo">2258</span> } catch (IOException ioe) {<a name="line.2258"></a> +<span class="sourceLineNo">2259</span> LOG.warn("Got exception when attempting to offline region "<a name="line.2259"></a> +<span class="sourceLineNo">2260</span> + Bytes.toString(rse.hri.getRegionName()), ioe);<a name="line.2260"></a> +<span class="sourceLineNo">2261</span> }<a name="line.2261"></a> +<span class="sourceLineNo">2262</span> }<a name="line.2262"></a> +<span class="sourceLineNo">2263</span> }<a name="line.2263"></a> +<span class="sourceLineNo">2264</span><a name="line.2264"></a> +<span class="sourceLineNo">2265</span> /**<a name="line.2265"></a> +<span class="sourceLineNo">2266</span> * Attempts to undeploy a region from a region server based in information in<a name="line.2266"></a> +<span class="sourceLineNo">2267</span> * META. Any operations that modify the file system should make sure that<a name="line.2267"></a> +<span class="sourceLineNo">2268</span> * its corresponding region is not deployed to prevent data races.<a name="line.2268"></a> +<span class="sourceLineNo">2269</span> *<a name="line.2269"></a> +<span class="sourceLineNo">2270</span> * A separate call is required to update the master in-memory region state<a name="line.2270"></a> +<span class="sourceLineNo">2271</span> * kept in the AssignementManager. Because disable uses this state instead of<a name="line.2271"></a> +<span class="sourceLineNo">2272</span> * that found in META, we can't seem to cleanly disable/delete tables that<a name="line.2272"></a> +<span class="sourceLineNo">2273</span> * have been hbck fixed. When used on a version of HBase that does not have<a name="line.2273"></a> +<span class="sourceLineNo">2274</span> * the offline ipc call exposed on the master (&lt;0.90.5, &lt;0.92.0) a master<a name="line.2274"></a> +<span class="sourceLineNo">2275</span> * restart or failover may be required.<a name="line.2275"></a> +<span class="sourceLineNo">2276</span> */<a name="line.2276"></a> +<span class="sourceLineNo">2277</span> private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {<a name="line.2277"></a> +<span class="sourceLineNo">2278</span> if (hi.metaEntry == null && hi.hdfsEntry == null) {<a name="line.2278"></a> +<span class="sourceLineNo">2279</span> undeployRegions(hi);<a name="line.2279"></a> +<span class="sourceLineNo">2280</span> return;<a name="line.2280"></a> +<span class="sourceLineNo">2281</span> }<a name="line.2281"></a> +<span class="sourceLineNo">2282</span><a name="line.2282"></a> +<span class="sourceLineNo">2283</span> // get assignment info and hregioninfo from meta.<a name="line.2283"></a> +<span class="sourceLineNo">2284</span> Get get = new Get(hi.getRegionName());<a name="line.2284"></a> +<span class="sourceLineNo">2285</span> get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);<a name="line.2285"></a> +<span class="sourceLineNo">2286</span> get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);<a name="line.2286"></a> +<span class="sourceLineNo">2287</span> get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);<a name="line.2287"></a> +<span class="sourceLineNo">2288</span> // also get the locations of the replicas to close if the primary region is being closed<a name="line.2288"></a> +<span class="sourceLineNo">2289</span> if (hi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {<a name="line.2289"></a> +<span class="sourceLineNo">2290</span> int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();<a name="line.2290"></a> +<span class="sourceLineNo">2291</span> for (int i = 0; i < numReplicas; i++) {<a name="line.2291"></a> +<span class="sourceLineNo">2292</span> get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));<a name="line.2292"></a> +<span class="sourceLineNo">2293</span> get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));<a name="line.2293"></a> +<span class="sourceLineNo">2294</span> }<a name="line.2294"></a> +<span class="sourceLineNo">2295</span> }<a name="line.2295"></a> +<span class="sourceLineNo">2296</span> Result r = meta.get(get);<a name="line.2296"></a> +<span class="sourceLineNo">2297</span> RegionLocations rl = MetaTableAccessor.getRegionLocations(r);<a name="line.2297"></a> +<span class="sourceLineNo">2298</span> if (rl == null) {<a name="line.2298"></a> +<span class="sourceLineNo">2299</span> LOG.warn("Unable to close region " + hi.getRegionNameAsString() +<a name="line.2299"></a> +<span class="sourceLineNo">2300</span> " since meta does not have handle to reach it");<a name="line.2300"></a> +<span class="sourceLineNo">2301</span> return;<a name="line.2301"></a> +<span class="sourceLineNo">2302</span> }<a name="line.2302"></a> +<span class="sourceLineNo">2303</span> for (HRegionLocation h : rl.getRegionLocations()) {<a name="line.2303"></a> +<span class="sourceLineNo">2304</span> ServerName serverName = h.getServerName();<a name="line.2304"></a> +<span class="sourceLineNo">2305</span> if (serverName == null) {<a name="line.2305"></a> +<span class="sourceLineNo">2306</span> errors.reportError("Unable to close region "<a name="line.2306"></a> +<span class="sourceLineNo">2307</span> + hi.getRegionNameAsString() + " because meta does not "<a name="line.2307"></a> +<span class="sourceLineNo">2308</span> + "have handle to reach it.");<a name="line.2308"></a> +<span class="sourceLineNo">2309</span> continue;<a name="line.2309"></a> +<span class="sourceLineNo">2310</span> }<a name="line.2310"></a> +<span class="sourceLineNo">2311</span> RegionInfo hri = h.getRegionInfo();<a name="line.2311"></a> +<span class="sourceLineNo">2312</span> if (hri == null) {<a name="line.2312"></a> +<span class="sourceLineNo">2313</span> LOG.warn("Unable to close region " + hi.getRegionNameAsString()<a name="line.2313"></a> +<span class="sourceLineNo">2314</span> + " because hbase:meta had invalid or missing "<a name="line.2314"></a> +<span class="sourceLineNo">2315</span> + HConstants.CATALOG_FAMILY_STR + ":"<a name="line.2315"></a> +<span class="sourceLineNo">2316</span> + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)<a name="line.2316"></a> +<span class="sourceLineNo">2317</span> + " qualifier value.");<a name="line.2317"></a> +<span class="sourceLineNo">2318</span> continue;<a name="line.2318"></a> +<span class="sourceLineNo">2319</span> }<a name="line.2319"></a> +<span class="sourceLineNo">2320</span> // close the region -- close files and remove assignment<a name="line.2320"></a> +<span class="sourceLineNo">2321</span> HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);<a name="line.2321"></a> +<span class="sourceLineNo">2322</span> }<a name="line.2322"></a> +<span class="sourceLineNo">2323</span> }<a name="line.2323"></a> +<span class="sourceLineNo">2324</span><a name="line.2324"></a> +<span class="sourceLineNo">2325</span> private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,<a name="line.2325"></a> +<span class="sourceLineNo">2326</span> KeeperException, InterruptedException {<a name="line.2326"></a> +<span class="sourceLineNo">2327</span> // If we are trying to fix the errors<a name="line.2327"></a> +<span class="sourceLineNo">2328</span> if (shouldFixAssignments()) {<a name="line.2328"></a> +<span class="sourceLineNo">2329</span> errors.print(msg);<a name="line.2329"></a> +<span class="sourceLineNo">2330</span> undeployRegions(hbi);<a name="line.2330"></a> +<span class="sourceLineNo">2331</span> setShouldRerun();<a name="line.2331"></a> +<span class="sourceLineNo">2332</span> RegionInfo hri = hbi.getHdfsHRI();<a name="line.2332"></a> +<span class="sourceLineNo">2333</span> if (hri == null) {<a name="line.2333"></a> +<span class="sourceLineNo">2334</span> hri = hbi.metaEntry;<a name="line.2334"></a> +<span class="sourceLineNo">2335</span> }<a name="line.2335"></a> +<span class="sourceLineNo">2336</span> HBaseFsckRepair.fixUnassigned(admin, hri);<a name="line.2336"></a> +<span class="sourceLineNo">2337</span> HBaseFsckRepair.waitUntilAssigned(admin, hri);<a name="line.2337"></a> +<span class="sourceLineNo">2338</span><a name="line.2338"></a> +<span class="sourceLineNo">2339</span> // also assign replicas if needed (do it only when this call operates on a primary replica)<a name="line.2339"></a> +<span class="sourceLineNo">2340</span> if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) return;<a name="line.2340"></a> +<span class="sourceLineNo">2341</span> int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();<a name="line.2341"></a> +<span class="sourceLineNo">2342</span> for (int i = 1; i < replicationCount; i++) {<a name="line.2342"></a> +<span class="sourceLineNo">2343</span> hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);<a name="line.2343"></a> +<span class="sourceLineNo">2344</span> HbckInfo h = regionInfoMap.get(hri.getEncodedName());<a name="line.2344"></a> +<span class="sourceLineNo">2345</span> if (h != null) {<a name="line.2345"></a> +<span class="sourceLineNo">2346</span> undeployRegions(h);<a name="line.2346"></a> +<span class="sourceLineNo">2347</span> //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore<a name="line.2347"></a> +<span class="sourceLineNo">2348</span> //in consistency checks<a name="line.2348"></a> +<span class="sourceLineNo">2349</span> h.setSkipChecks(true);<a name="line.2349"></a> +<span class="sourceLineNo">2350</span> }<a name="line.2350"></a> +<span class="sourceLineNo">2351</span> HBaseFsckRepair.fixUnassigned(admin, hri);<a name="line.2351"></a> +<span class="sourceLineNo">2352</span> HBaseFsckRepair.waitUntilAssigned(admin, hri);<a name="line.2352"></a> +<span class="sourceLineNo">2353</span> }<a name="line.2353"></a> +<span class="sourceLineNo">2354</span><a name="line.2354"></a> +<span class="sourceLineNo">2355</span> }<a name="line.2355"></a> +<span class="sourceLineNo">2356</span> }<a name="line.2356"></a> +<span class="sourceLineNo">2357</span><a name="line.2357"></a> +<span class="sourceLineNo">2358</span> /**<a name="line.2358"></a> +<span class="sourceLineNo">2359</span> * Check a single region for consistency and correct deployment.<a name="line.2359"></a> +<span class="sourceLineNo">2360</span> */<a name="line.2360"></a> +<span class="sourceLineNo">2361</span> private void checkRegionConsistency(final String key, final HbckInfo hbi)<a name="line.2361"></a> +<span class="sourceLineNo">2362</span> throws IOException, KeeperException, InterruptedException {<a name="line.2362"></a> +<span class="sourceLineNo">2363</span><a name="line.2363"></a> +<span class="sourceLineNo">2364</span> if (hbi.isSkipChecks()) return;<a name="line.2364"></a> +<span class="sourceLineNo">2365</span> String descriptiveName = hbi.toString();<a name="line.2365"></a> +<span class="sourceLineNo">2366</span> boolean inMeta = hbi.metaEntry != null;<a name="line.2366"></a> +<span class="sourceLineNo">2367</span> // In case not checking HDFS, assume the region is on HDFS<a name="line.2367"></a> +<span class="sourceLineNo">2368</span> boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;<a name="line.2368"></a> +<span class="sourceLineNo">2369</span> boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;<a name="line.2369"></a> +<span class="sourceLineNo">2370</span> boolean isDeployed = !hbi.deployedOn.isEmpty();<a name="line.2370"></a> +<span class="sourceLineNo">2371</span> boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;<a name="line.2371"></a> +<span class="sourceLineNo">2372</span> boolean deploymentMatchesMeta =<a name="line.2372"></a> +<span class="sourceLineNo">2373</span> hasMetaAssignment && isDeployed && !isMultiplyDeployed &&<a name="line.2373"></a> +<span class="sourceLineNo">2374</span> hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));<a name="line.2374"></a> +<span class="sourceLineNo">2375</span> boolean splitParent =<a name="line.2375"></a> +<span class="sourceLineNo">2376</span> inMeta && hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();<a name="line.2376"></a> +<span class="sourceLineNo">2377</span> boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry.getTable());<a name="line.2377"></a> +<span class="sourceLineNo">2378</span> boolean recentlyModified = inHdfs &&<a name="line.2378"></a> +<span class="sourceLineNo">2379</span> hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();<a name="line.2379"></a> +<span class="sourceLineNo">2380</span><a name="line.2380"></a> +<span class="sourceLineNo">2381</span> // ========== First the healthy cases =============<a name="line.2381"></a> +<span class="sourceLineNo">2382</span> if (hbi.containsOnlyHdfsEdits()) {<a name="line.2382"></a> +<span class="sourceLineNo">2383</span> return;<a name="line.2383"></a> +<span class="sourceLineNo">2384</span> }<a name="line.2384"></a> +<span class="sourceLineNo">2385</span> if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {<a name="line.2385"></a> +<span class="sourceLineNo">2386</span> return;<a name="line.2386"></a> +<span class="sourceLineNo">2387</span> } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {<a name="line.2387"></a> +<span class="sourceLineNo">2388</span> LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +<a name="line.2388"></a> +<span class="sourceLineNo">2389</span> "tabled that is not deployed");<a name="line.2389"></a> +<span class="sourceLineNo">2390</span> return;<a name="line.2390"></a> +<span class="sourceLineNo">2391</span> } else if (recentlyModified) {<a name="line.2391"></a> +<span clas
<TRUNCATED>