http://git-wip-us.apache.org/repos/asf/hbase-site/blob/04d647a7/devapidocs/src-html/org/apache/hadoop/hbase/util/HBaseFsck.OnlineEntry.html ---------------------------------------------------------------------- diff --git a/devapidocs/src-html/org/apache/hadoop/hbase/util/HBaseFsck.OnlineEntry.html b/devapidocs/src-html/org/apache/hadoop/hbase/util/HBaseFsck.OnlineEntry.html index bf1a2cc..89317aa 100644 --- a/devapidocs/src-html/org/apache/hadoop/hbase/util/HBaseFsck.OnlineEntry.html +++ b/devapidocs/src-html/org/apache/hadoop/hbase/util/HBaseFsck.OnlineEntry.html @@ -1813,3325 +1813,3330 @@ <span class="sourceLineNo">1805</span> private void loadTableStates()<a name="line.1805"></a> <span class="sourceLineNo">1806</span> throws IOException {<a name="line.1806"></a> <span class="sourceLineNo">1807</span> tableStates = MetaTableAccessor.getTableStates(connection);<a name="line.1807"></a> -<span class="sourceLineNo">1808</span> }<a name="line.1808"></a> -<span class="sourceLineNo">1809</span><a name="line.1809"></a> -<span class="sourceLineNo">1810</span> /**<a name="line.1810"></a> -<span class="sourceLineNo">1811</span> * Check if the specified region's table is disabled.<a name="line.1811"></a> -<span class="sourceLineNo">1812</span> * @param tableName table to check status of<a name="line.1812"></a> -<span class="sourceLineNo">1813</span> */<a name="line.1813"></a> -<span class="sourceLineNo">1814</span> private boolean isTableDisabled(TableName tableName) {<a name="line.1814"></a> -<span class="sourceLineNo">1815</span> return tableStates.containsKey(tableName)<a name="line.1815"></a> -<span class="sourceLineNo">1816</span> && tableStates.get(tableName)<a name="line.1816"></a> -<span class="sourceLineNo">1817</span> .inStates(TableState.State.DISABLED, TableState.State.DISABLING);<a name="line.1817"></a> -<span class="sourceLineNo">1818</span> }<a name="line.1818"></a> -<span class="sourceLineNo">1819</span><a name="line.1819"></a> -<span class="sourceLineNo">1820</span> /**<a name="line.1820"></a> -<span class="sourceLineNo">1821</span> * Scan HDFS for all regions, recording their information into<a name="line.1821"></a> -<span class="sourceLineNo">1822</span> * regionInfoMap<a name="line.1822"></a> -<span class="sourceLineNo">1823</span> */<a name="line.1823"></a> -<span class="sourceLineNo">1824</span> public void loadHdfsRegionDirs() throws IOException, InterruptedException {<a name="line.1824"></a> -<span class="sourceLineNo">1825</span> Path rootDir = FSUtils.getRootDir(getConf());<a name="line.1825"></a> -<span class="sourceLineNo">1826</span> FileSystem fs = rootDir.getFileSystem(getConf());<a name="line.1826"></a> -<span class="sourceLineNo">1827</span><a name="line.1827"></a> -<span class="sourceLineNo">1828</span> // list all tables from HDFS<a name="line.1828"></a> -<span class="sourceLineNo">1829</span> List<FileStatus> tableDirs = Lists.newArrayList();<a name="line.1829"></a> -<span class="sourceLineNo">1830</span><a name="line.1830"></a> -<span class="sourceLineNo">1831</span> boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));<a name="line.1831"></a> +<span class="sourceLineNo">1808</span> // Add hbase:meta so this tool keeps working. In hbase2, meta is always enabled though it<a name="line.1808"></a> +<span class="sourceLineNo">1809</span> // has no entry in the table states. HBCK doesn't work right w/ hbase2 but just do this in<a name="line.1809"></a> +<span class="sourceLineNo">1810</span> // meantime.<a name="line.1810"></a> +<span class="sourceLineNo">1811</span> this.tableStates.put(TableName.META_TABLE_NAME,<a name="line.1811"></a> +<span class="sourceLineNo">1812</span> new TableState(TableName.META_TABLE_NAME, TableState.State.ENABLED));<a name="line.1812"></a> +<span class="sourceLineNo">1813</span> }<a name="line.1813"></a> +<span class="sourceLineNo">1814</span><a name="line.1814"></a> +<span class="sourceLineNo">1815</span> /**<a name="line.1815"></a> +<span class="sourceLineNo">1816</span> * Check if the specified region's table is disabled.<a name="line.1816"></a> +<span class="sourceLineNo">1817</span> * @param tableName table to check status of<a name="line.1817"></a> +<span class="sourceLineNo">1818</span> */<a name="line.1818"></a> +<span class="sourceLineNo">1819</span> private boolean isTableDisabled(TableName tableName) {<a name="line.1819"></a> +<span class="sourceLineNo">1820</span> return tableStates.containsKey(tableName)<a name="line.1820"></a> +<span class="sourceLineNo">1821</span> && tableStates.get(tableName)<a name="line.1821"></a> +<span class="sourceLineNo">1822</span> .inStates(TableState.State.DISABLED, TableState.State.DISABLING);<a name="line.1822"></a> +<span class="sourceLineNo">1823</span> }<a name="line.1823"></a> +<span class="sourceLineNo">1824</span><a name="line.1824"></a> +<span class="sourceLineNo">1825</span> /**<a name="line.1825"></a> +<span class="sourceLineNo">1826</span> * Scan HDFS for all regions, recording their information into<a name="line.1826"></a> +<span class="sourceLineNo">1827</span> * regionInfoMap<a name="line.1827"></a> +<span class="sourceLineNo">1828</span> */<a name="line.1828"></a> +<span class="sourceLineNo">1829</span> public void loadHdfsRegionDirs() throws IOException, InterruptedException {<a name="line.1829"></a> +<span class="sourceLineNo">1830</span> Path rootDir = FSUtils.getRootDir(getConf());<a name="line.1830"></a> +<span class="sourceLineNo">1831</span> FileSystem fs = rootDir.getFileSystem(getConf());<a name="line.1831"></a> <span class="sourceLineNo">1832</span><a name="line.1832"></a> -<span class="sourceLineNo">1833</span> List<Path> paths = FSUtils.getTableDirs(fs, rootDir);<a name="line.1833"></a> -<span class="sourceLineNo">1834</span> for (Path path : paths) {<a name="line.1834"></a> -<span class="sourceLineNo">1835</span> TableName tableName = FSUtils.getTableName(path);<a name="line.1835"></a> -<span class="sourceLineNo">1836</span> if ((!checkMetaOnly &&<a name="line.1836"></a> -<span class="sourceLineNo">1837</span> isTableIncluded(tableName)) ||<a name="line.1837"></a> -<span class="sourceLineNo">1838</span> tableName.equals(TableName.META_TABLE_NAME)) {<a name="line.1838"></a> -<span class="sourceLineNo">1839</span> tableDirs.add(fs.getFileStatus(path));<a name="line.1839"></a> -<span class="sourceLineNo">1840</span> }<a name="line.1840"></a> -<span class="sourceLineNo">1841</span> }<a name="line.1841"></a> -<span class="sourceLineNo">1842</span><a name="line.1842"></a> -<span class="sourceLineNo">1843</span> // verify that version file exists<a name="line.1843"></a> -<span class="sourceLineNo">1844</span> if (!foundVersionFile) {<a name="line.1844"></a> -<span class="sourceLineNo">1845</span> errors.reportError(ERROR_CODE.NO_VERSION_FILE,<a name="line.1845"></a> -<span class="sourceLineNo">1846</span> "Version file does not exist in root dir " + rootDir);<a name="line.1846"></a> -<span class="sourceLineNo">1847</span> if (shouldFixVersionFile()) {<a name="line.1847"></a> -<span class="sourceLineNo">1848</span> LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME<a name="line.1848"></a> -<span class="sourceLineNo">1849</span> + " file.");<a name="line.1849"></a> -<span class="sourceLineNo">1850</span> setShouldRerun();<a name="line.1850"></a> -<span class="sourceLineNo">1851</span> FSUtils.setVersion(fs, rootDir, getConf().getInt(<a name="line.1851"></a> -<span class="sourceLineNo">1852</span> HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(<a name="line.1852"></a> -<span class="sourceLineNo">1853</span> HConstants.VERSION_FILE_WRITE_ATTEMPTS,<a name="line.1853"></a> -<span class="sourceLineNo">1854</span> HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));<a name="line.1854"></a> -<span class="sourceLineNo">1855</span> }<a name="line.1855"></a> -<span class="sourceLineNo">1856</span> }<a name="line.1856"></a> -<span class="sourceLineNo">1857</span><a name="line.1857"></a> -<span class="sourceLineNo">1858</span> // Avoid multithreading at table-level because already multithreaded internally at<a name="line.1858"></a> -<span class="sourceLineNo">1859</span> // region-level. Additionally multithreading at table-level can lead to deadlock<a name="line.1859"></a> -<span class="sourceLineNo">1860</span> // if there are many tables in the cluster. Since there are a limited # of threads<a name="line.1860"></a> -<span class="sourceLineNo">1861</span> // in the executor's thread pool and if we multithread at the table-level by putting<a name="line.1861"></a> -<span class="sourceLineNo">1862</span> // WorkItemHdfsDir callables into the executor, then we will have some threads in the<a name="line.1862"></a> -<span class="sourceLineNo">1863</span> // executor tied up solely in waiting for the tables' region-level calls to complete.<a name="line.1863"></a> -<span class="sourceLineNo">1864</span> // If there are enough tables then there will be no actual threads in the pool left<a name="line.1864"></a> -<span class="sourceLineNo">1865</span> // for the region-level callables to be serviced.<a name="line.1865"></a> -<span class="sourceLineNo">1866</span> for (FileStatus tableDir : tableDirs) {<a name="line.1866"></a> -<span class="sourceLineNo">1867</span> LOG.debug("Loading region dirs from " +tableDir.getPath());<a name="line.1867"></a> -<span class="sourceLineNo">1868</span> WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);<a name="line.1868"></a> -<span class="sourceLineNo">1869</span> try {<a name="line.1869"></a> -<span class="sourceLineNo">1870</span> item.call();<a name="line.1870"></a> -<span class="sourceLineNo">1871</span> } catch (ExecutionException e) {<a name="line.1871"></a> -<span class="sourceLineNo">1872</span> LOG.warn("Could not completely load table dir " +<a name="line.1872"></a> -<span class="sourceLineNo">1873</span> tableDir.getPath(), e.getCause());<a name="line.1873"></a> -<span class="sourceLineNo">1874</span> }<a name="line.1874"></a> -<span class="sourceLineNo">1875</span> }<a name="line.1875"></a> -<span class="sourceLineNo">1876</span> errors.print("");<a name="line.1876"></a> -<span class="sourceLineNo">1877</span> }<a name="line.1877"></a> -<span class="sourceLineNo">1878</span><a name="line.1878"></a> -<span class="sourceLineNo">1879</span> /**<a name="line.1879"></a> -<span class="sourceLineNo">1880</span> * Record the location of the hbase:meta region as found in ZooKeeper.<a name="line.1880"></a> -<span class="sourceLineNo">1881</span> */<a name="line.1881"></a> -<span class="sourceLineNo">1882</span> private boolean recordMetaRegion() throws IOException {<a name="line.1882"></a> -<span class="sourceLineNo">1883</span> RegionLocations rl = connection.locateRegion(TableName.META_TABLE_NAME,<a name="line.1883"></a> -<span class="sourceLineNo">1884</span> HConstants.EMPTY_START_ROW, false, false);<a name="line.1884"></a> -<span class="sourceLineNo">1885</span> if (rl == null) {<a name="line.1885"></a> -<span class="sourceLineNo">1886</span> errors.reportError(ERROR_CODE.NULL_META_REGION,<a name="line.1886"></a> -<span class="sourceLineNo">1887</span> "META region was not found in ZooKeeper");<a name="line.1887"></a> -<span class="sourceLineNo">1888</span> return false;<a name="line.1888"></a> -<span class="sourceLineNo">1889</span> }<a name="line.1889"></a> -<span class="sourceLineNo">1890</span> for (HRegionLocation metaLocation : rl.getRegionLocations()) {<a name="line.1890"></a> -<span class="sourceLineNo">1891</span> // Check if Meta region is valid and existing<a name="line.1891"></a> -<span class="sourceLineNo">1892</span> if (metaLocation == null ) {<a name="line.1892"></a> -<span class="sourceLineNo">1893</span> errors.reportError(ERROR_CODE.NULL_META_REGION,<a name="line.1893"></a> -<span class="sourceLineNo">1894</span> "META region location is null");<a name="line.1894"></a> -<span class="sourceLineNo">1895</span> return false;<a name="line.1895"></a> -<span class="sourceLineNo">1896</span> }<a name="line.1896"></a> -<span class="sourceLineNo">1897</span> if (metaLocation.getRegionInfo() == null) {<a name="line.1897"></a> +<span class="sourceLineNo">1833</span> // list all tables from HDFS<a name="line.1833"></a> +<span class="sourceLineNo">1834</span> List<FileStatus> tableDirs = Lists.newArrayList();<a name="line.1834"></a> +<span class="sourceLineNo">1835</span><a name="line.1835"></a> +<span class="sourceLineNo">1836</span> boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));<a name="line.1836"></a> +<span class="sourceLineNo">1837</span><a name="line.1837"></a> +<span class="sourceLineNo">1838</span> List<Path> paths = FSUtils.getTableDirs(fs, rootDir);<a name="line.1838"></a> +<span class="sourceLineNo">1839</span> for (Path path : paths) {<a name="line.1839"></a> +<span class="sourceLineNo">1840</span> TableName tableName = FSUtils.getTableName(path);<a name="line.1840"></a> +<span class="sourceLineNo">1841</span> if ((!checkMetaOnly &&<a name="line.1841"></a> +<span class="sourceLineNo">1842</span> isTableIncluded(tableName)) ||<a name="line.1842"></a> +<span class="sourceLineNo">1843</span> tableName.equals(TableName.META_TABLE_NAME)) {<a name="line.1843"></a> +<span class="sourceLineNo">1844</span> tableDirs.add(fs.getFileStatus(path));<a name="line.1844"></a> +<span class="sourceLineNo">1845</span> }<a name="line.1845"></a> +<span class="sourceLineNo">1846</span> }<a name="line.1846"></a> +<span class="sourceLineNo">1847</span><a name="line.1847"></a> +<span class="sourceLineNo">1848</span> // verify that version file exists<a name="line.1848"></a> +<span class="sourceLineNo">1849</span> if (!foundVersionFile) {<a name="line.1849"></a> +<span class="sourceLineNo">1850</span> errors.reportError(ERROR_CODE.NO_VERSION_FILE,<a name="line.1850"></a> +<span class="sourceLineNo">1851</span> "Version file does not exist in root dir " + rootDir);<a name="line.1851"></a> +<span class="sourceLineNo">1852</span> if (shouldFixVersionFile()) {<a name="line.1852"></a> +<span class="sourceLineNo">1853</span> LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME<a name="line.1853"></a> +<span class="sourceLineNo">1854</span> + " file.");<a name="line.1854"></a> +<span class="sourceLineNo">1855</span> setShouldRerun();<a name="line.1855"></a> +<span class="sourceLineNo">1856</span> FSUtils.setVersion(fs, rootDir, getConf().getInt(<a name="line.1856"></a> +<span class="sourceLineNo">1857</span> HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(<a name="line.1857"></a> +<span class="sourceLineNo">1858</span> HConstants.VERSION_FILE_WRITE_ATTEMPTS,<a name="line.1858"></a> +<span class="sourceLineNo">1859</span> HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));<a name="line.1859"></a> +<span class="sourceLineNo">1860</span> }<a name="line.1860"></a> +<span class="sourceLineNo">1861</span> }<a name="line.1861"></a> +<span class="sourceLineNo">1862</span><a name="line.1862"></a> +<span class="sourceLineNo">1863</span> // Avoid multithreading at table-level because already multithreaded internally at<a name="line.1863"></a> +<span class="sourceLineNo">1864</span> // region-level. Additionally multithreading at table-level can lead to deadlock<a name="line.1864"></a> +<span class="sourceLineNo">1865</span> // if there are many tables in the cluster. Since there are a limited # of threads<a name="line.1865"></a> +<span class="sourceLineNo">1866</span> // in the executor's thread pool and if we multithread at the table-level by putting<a name="line.1866"></a> +<span class="sourceLineNo">1867</span> // WorkItemHdfsDir callables into the executor, then we will have some threads in the<a name="line.1867"></a> +<span class="sourceLineNo">1868</span> // executor tied up solely in waiting for the tables' region-level calls to complete.<a name="line.1868"></a> +<span class="sourceLineNo">1869</span> // If there are enough tables then there will be no actual threads in the pool left<a name="line.1869"></a> +<span class="sourceLineNo">1870</span> // for the region-level callables to be serviced.<a name="line.1870"></a> +<span class="sourceLineNo">1871</span> for (FileStatus tableDir : tableDirs) {<a name="line.1871"></a> +<span class="sourceLineNo">1872</span> LOG.debug("Loading region dirs from " +tableDir.getPath());<a name="line.1872"></a> +<span class="sourceLineNo">1873</span> WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);<a name="line.1873"></a> +<span class="sourceLineNo">1874</span> try {<a name="line.1874"></a> +<span class="sourceLineNo">1875</span> item.call();<a name="line.1875"></a> +<span class="sourceLineNo">1876</span> } catch (ExecutionException e) {<a name="line.1876"></a> +<span class="sourceLineNo">1877</span> LOG.warn("Could not completely load table dir " +<a name="line.1877"></a> +<span class="sourceLineNo">1878</span> tableDir.getPath(), e.getCause());<a name="line.1878"></a> +<span class="sourceLineNo">1879</span> }<a name="line.1879"></a> +<span class="sourceLineNo">1880</span> }<a name="line.1880"></a> +<span class="sourceLineNo">1881</span> errors.print("");<a name="line.1881"></a> +<span class="sourceLineNo">1882</span> }<a name="line.1882"></a> +<span class="sourceLineNo">1883</span><a name="line.1883"></a> +<span class="sourceLineNo">1884</span> /**<a name="line.1884"></a> +<span class="sourceLineNo">1885</span> * Record the location of the hbase:meta region as found in ZooKeeper.<a name="line.1885"></a> +<span class="sourceLineNo">1886</span> */<a name="line.1886"></a> +<span class="sourceLineNo">1887</span> private boolean recordMetaRegion() throws IOException {<a name="line.1887"></a> +<span class="sourceLineNo">1888</span> RegionLocations rl = connection.locateRegion(TableName.META_TABLE_NAME,<a name="line.1888"></a> +<span class="sourceLineNo">1889</span> HConstants.EMPTY_START_ROW, false, false);<a name="line.1889"></a> +<span class="sourceLineNo">1890</span> if (rl == null) {<a name="line.1890"></a> +<span class="sourceLineNo">1891</span> errors.reportError(ERROR_CODE.NULL_META_REGION,<a name="line.1891"></a> +<span class="sourceLineNo">1892</span> "META region was not found in ZooKeeper");<a name="line.1892"></a> +<span class="sourceLineNo">1893</span> return false;<a name="line.1893"></a> +<span class="sourceLineNo">1894</span> }<a name="line.1894"></a> +<span class="sourceLineNo">1895</span> for (HRegionLocation metaLocation : rl.getRegionLocations()) {<a name="line.1895"></a> +<span class="sourceLineNo">1896</span> // Check if Meta region is valid and existing<a name="line.1896"></a> +<span class="sourceLineNo">1897</span> if (metaLocation == null ) {<a name="line.1897"></a> <span class="sourceLineNo">1898</span> errors.reportError(ERROR_CODE.NULL_META_REGION,<a name="line.1898"></a> -<span class="sourceLineNo">1899</span> "META location regionInfo is null");<a name="line.1899"></a> +<span class="sourceLineNo">1899</span> "META region location is null");<a name="line.1899"></a> <span class="sourceLineNo">1900</span> return false;<a name="line.1900"></a> <span class="sourceLineNo">1901</span> }<a name="line.1901"></a> -<span class="sourceLineNo">1902</span> if (metaLocation.getHostname() == null) {<a name="line.1902"></a> +<span class="sourceLineNo">1902</span> if (metaLocation.getRegionInfo() == null) {<a name="line.1902"></a> <span class="sourceLineNo">1903</span> errors.reportError(ERROR_CODE.NULL_META_REGION,<a name="line.1903"></a> -<span class="sourceLineNo">1904</span> "META location hostName is null");<a name="line.1904"></a> +<span class="sourceLineNo">1904</span> "META location regionInfo is null");<a name="line.1904"></a> <span class="sourceLineNo">1905</span> return false;<a name="line.1905"></a> <span class="sourceLineNo">1906</span> }<a name="line.1906"></a> -<span class="sourceLineNo">1907</span> ServerName sn = metaLocation.getServerName();<a name="line.1907"></a> -<span class="sourceLineNo">1908</span> MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, EnvironmentEdgeManager.currentTime());<a name="line.1908"></a> -<span class="sourceLineNo">1909</span> HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());<a name="line.1909"></a> -<span class="sourceLineNo">1910</span> if (hbckInfo == null) {<a name="line.1910"></a> -<span class="sourceLineNo">1911</span> regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));<a name="line.1911"></a> -<span class="sourceLineNo">1912</span> } else {<a name="line.1912"></a> -<span class="sourceLineNo">1913</span> hbckInfo.metaEntry = m;<a name="line.1913"></a> -<span class="sourceLineNo">1914</span> }<a name="line.1914"></a> -<span class="sourceLineNo">1915</span> }<a name="line.1915"></a> -<span class="sourceLineNo">1916</span> return true;<a name="line.1916"></a> -<span class="sourceLineNo">1917</span> }<a name="line.1917"></a> -<span class="sourceLineNo">1918</span><a name="line.1918"></a> -<span class="sourceLineNo">1919</span> private ZKWatcher createZooKeeperWatcher() throws IOException {<a name="line.1919"></a> -<span class="sourceLineNo">1920</span> return new ZKWatcher(getConf(), "hbase Fsck", new Abortable() {<a name="line.1920"></a> -<span class="sourceLineNo">1921</span> @Override<a name="line.1921"></a> -<span class="sourceLineNo">1922</span> public void abort(String why, Throwable e) {<a name="line.1922"></a> -<span class="sourceLineNo">1923</span> LOG.error(why, e);<a name="line.1923"></a> -<span class="sourceLineNo">1924</span> System.exit(1);<a name="line.1924"></a> -<span class="sourceLineNo">1925</span> }<a name="line.1925"></a> -<span class="sourceLineNo">1926</span><a name="line.1926"></a> -<span class="sourceLineNo">1927</span> @Override<a name="line.1927"></a> -<span class="sourceLineNo">1928</span> public boolean isAborted() {<a name="line.1928"></a> -<span class="sourceLineNo">1929</span> return false;<a name="line.1929"></a> +<span class="sourceLineNo">1907</span> if (metaLocation.getHostname() == null) {<a name="line.1907"></a> +<span class="sourceLineNo">1908</span> errors.reportError(ERROR_CODE.NULL_META_REGION,<a name="line.1908"></a> +<span class="sourceLineNo">1909</span> "META location hostName is null");<a name="line.1909"></a> +<span class="sourceLineNo">1910</span> return false;<a name="line.1910"></a> +<span class="sourceLineNo">1911</span> }<a name="line.1911"></a> +<span class="sourceLineNo">1912</span> ServerName sn = metaLocation.getServerName();<a name="line.1912"></a> +<span class="sourceLineNo">1913</span> MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, EnvironmentEdgeManager.currentTime());<a name="line.1913"></a> +<span class="sourceLineNo">1914</span> HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());<a name="line.1914"></a> +<span class="sourceLineNo">1915</span> if (hbckInfo == null) {<a name="line.1915"></a> +<span class="sourceLineNo">1916</span> regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));<a name="line.1916"></a> +<span class="sourceLineNo">1917</span> } else {<a name="line.1917"></a> +<span class="sourceLineNo">1918</span> hbckInfo.metaEntry = m;<a name="line.1918"></a> +<span class="sourceLineNo">1919</span> }<a name="line.1919"></a> +<span class="sourceLineNo">1920</span> }<a name="line.1920"></a> +<span class="sourceLineNo">1921</span> return true;<a name="line.1921"></a> +<span class="sourceLineNo">1922</span> }<a name="line.1922"></a> +<span class="sourceLineNo">1923</span><a name="line.1923"></a> +<span class="sourceLineNo">1924</span> private ZKWatcher createZooKeeperWatcher() throws IOException {<a name="line.1924"></a> +<span class="sourceLineNo">1925</span> return new ZKWatcher(getConf(), "hbase Fsck", new Abortable() {<a name="line.1925"></a> +<span class="sourceLineNo">1926</span> @Override<a name="line.1926"></a> +<span class="sourceLineNo">1927</span> public void abort(String why, Throwable e) {<a name="line.1927"></a> +<span class="sourceLineNo">1928</span> LOG.error(why, e);<a name="line.1928"></a> +<span class="sourceLineNo">1929</span> System.exit(1);<a name="line.1929"></a> <span class="sourceLineNo">1930</span> }<a name="line.1930"></a> <span class="sourceLineNo">1931</span><a name="line.1931"></a> -<span class="sourceLineNo">1932</span> });<a name="line.1932"></a> -<span class="sourceLineNo">1933</span> }<a name="line.1933"></a> -<span class="sourceLineNo">1934</span><a name="line.1934"></a> -<span class="sourceLineNo">1935</span> private ServerName getMetaRegionServerName(int replicaId)<a name="line.1935"></a> -<span class="sourceLineNo">1936</span> throws IOException, KeeperException {<a name="line.1936"></a> -<span class="sourceLineNo">1937</span> return new MetaTableLocator().getMetaRegionLocation(zkw, replicaId);<a name="line.1937"></a> +<span class="sourceLineNo">1932</span> @Override<a name="line.1932"></a> +<span class="sourceLineNo">1933</span> public boolean isAborted() {<a name="line.1933"></a> +<span class="sourceLineNo">1934</span> return false;<a name="line.1934"></a> +<span class="sourceLineNo">1935</span> }<a name="line.1935"></a> +<span class="sourceLineNo">1936</span><a name="line.1936"></a> +<span class="sourceLineNo">1937</span> });<a name="line.1937"></a> <span class="sourceLineNo">1938</span> }<a name="line.1938"></a> <span class="sourceLineNo">1939</span><a name="line.1939"></a> -<span class="sourceLineNo">1940</span> /**<a name="line.1940"></a> -<span class="sourceLineNo">1941</span> * Contacts each regionserver and fetches metadata about regions.<a name="line.1941"></a> -<span class="sourceLineNo">1942</span> * @param regionServerList - the list of region servers to connect to<a name="line.1942"></a> -<span class="sourceLineNo">1943</span> * @throws IOException if a remote or network exception occurs<a name="line.1943"></a> -<span class="sourceLineNo">1944</span> */<a name="line.1944"></a> -<span class="sourceLineNo">1945</span> void processRegionServers(Collection<ServerName> regionServerList)<a name="line.1945"></a> -<span class="sourceLineNo">1946</span> throws IOException, InterruptedException {<a name="line.1946"></a> -<span class="sourceLineNo">1947</span><a name="line.1947"></a> -<span class="sourceLineNo">1948</span> List<WorkItemRegion> workItems = new ArrayList<>(regionServerList.size());<a name="line.1948"></a> -<span class="sourceLineNo">1949</span> List<Future<Void>> workFutures;<a name="line.1949"></a> -<span class="sourceLineNo">1950</span><a name="line.1950"></a> -<span class="sourceLineNo">1951</span> // loop to contact each region server in parallel<a name="line.1951"></a> -<span class="sourceLineNo">1952</span> for (ServerName rsinfo: regionServerList) {<a name="line.1952"></a> -<span class="sourceLineNo">1953</span> workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));<a name="line.1953"></a> -<span class="sourceLineNo">1954</span> }<a name="line.1954"></a> +<span class="sourceLineNo">1940</span> private ServerName getMetaRegionServerName(int replicaId)<a name="line.1940"></a> +<span class="sourceLineNo">1941</span> throws IOException, KeeperException {<a name="line.1941"></a> +<span class="sourceLineNo">1942</span> return new MetaTableLocator().getMetaRegionLocation(zkw, replicaId);<a name="line.1942"></a> +<span class="sourceLineNo">1943</span> }<a name="line.1943"></a> +<span class="sourceLineNo">1944</span><a name="line.1944"></a> +<span class="sourceLineNo">1945</span> /**<a name="line.1945"></a> +<span class="sourceLineNo">1946</span> * Contacts each regionserver and fetches metadata about regions.<a name="line.1946"></a> +<span class="sourceLineNo">1947</span> * @param regionServerList - the list of region servers to connect to<a name="line.1947"></a> +<span class="sourceLineNo">1948</span> * @throws IOException if a remote or network exception occurs<a name="line.1948"></a> +<span class="sourceLineNo">1949</span> */<a name="line.1949"></a> +<span class="sourceLineNo">1950</span> void processRegionServers(Collection<ServerName> regionServerList)<a name="line.1950"></a> +<span class="sourceLineNo">1951</span> throws IOException, InterruptedException {<a name="line.1951"></a> +<span class="sourceLineNo">1952</span><a name="line.1952"></a> +<span class="sourceLineNo">1953</span> List<WorkItemRegion> workItems = new ArrayList<>(regionServerList.size());<a name="line.1953"></a> +<span class="sourceLineNo">1954</span> List<Future<Void>> workFutures;<a name="line.1954"></a> <span class="sourceLineNo">1955</span><a name="line.1955"></a> -<span class="sourceLineNo">1956</span> workFutures = executor.invokeAll(workItems);<a name="line.1956"></a> -<span class="sourceLineNo">1957</span><a name="line.1957"></a> -<span class="sourceLineNo">1958</span> for(int i=0; i<workFutures.size(); i++) {<a name="line.1958"></a> -<span class="sourceLineNo">1959</span> WorkItemRegion item = workItems.get(i);<a name="line.1959"></a> -<span class="sourceLineNo">1960</span> Future<Void> f = workFutures.get(i);<a name="line.1960"></a> -<span class="sourceLineNo">1961</span> try {<a name="line.1961"></a> -<span class="sourceLineNo">1962</span> f.get();<a name="line.1962"></a> -<span class="sourceLineNo">1963</span> } catch(ExecutionException e) {<a name="line.1963"></a> -<span class="sourceLineNo">1964</span> LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),<a name="line.1964"></a> -<span class="sourceLineNo">1965</span> e.getCause());<a name="line.1965"></a> -<span class="sourceLineNo">1966</span> }<a name="line.1966"></a> -<span class="sourceLineNo">1967</span> }<a name="line.1967"></a> -<span class="sourceLineNo">1968</span> }<a name="line.1968"></a> -<span class="sourceLineNo">1969</span><a name="line.1969"></a> -<span class="sourceLineNo">1970</span> /**<a name="line.1970"></a> -<span class="sourceLineNo">1971</span> * Check consistency of all regions that have been found in previous phases.<a name="line.1971"></a> -<span class="sourceLineNo">1972</span> */<a name="line.1972"></a> -<span class="sourceLineNo">1973</span> private void checkAndFixConsistency()<a name="line.1973"></a> -<span class="sourceLineNo">1974</span> throws IOException, KeeperException, InterruptedException {<a name="line.1974"></a> -<span class="sourceLineNo">1975</span> // Divide the checks in two phases. One for default/primary replicas and another<a name="line.1975"></a> -<span class="sourceLineNo">1976</span> // for the non-primary ones. Keeps code cleaner this way.<a name="line.1976"></a> -<span class="sourceLineNo">1977</span><a name="line.1977"></a> -<span class="sourceLineNo">1978</span> List<CheckRegionConsistencyWorkItem> workItems = new ArrayList<>(regionInfoMap.size());<a name="line.1978"></a> -<span class="sourceLineNo">1979</span> for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {<a name="line.1979"></a> -<span class="sourceLineNo">1980</span> if (e.getValue().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {<a name="line.1980"></a> -<span class="sourceLineNo">1981</span> workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));<a name="line.1981"></a> -<span class="sourceLineNo">1982</span> }<a name="line.1982"></a> -<span class="sourceLineNo">1983</span> }<a name="line.1983"></a> -<span class="sourceLineNo">1984</span> checkRegionConsistencyConcurrently(workItems);<a name="line.1984"></a> -<span class="sourceLineNo">1985</span><a name="line.1985"></a> -<span class="sourceLineNo">1986</span> boolean prevHdfsCheck = shouldCheckHdfs();<a name="line.1986"></a> -<span class="sourceLineNo">1987</span> setCheckHdfs(false); //replicas don't have any hdfs data<a name="line.1987"></a> -<span class="sourceLineNo">1988</span> // Run a pass over the replicas and fix any assignment issues that exist on the currently<a name="line.1988"></a> -<span class="sourceLineNo">1989</span> // deployed/undeployed replicas.<a name="line.1989"></a> -<span class="sourceLineNo">1990</span> List<CheckRegionConsistencyWorkItem> replicaWorkItems = new ArrayList<>(regionInfoMap.size());<a name="line.1990"></a> -<span class="sourceLineNo">1991</span> for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {<a name="line.1991"></a> -<span class="sourceLineNo">1992</span> if (e.getValue().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {<a name="line.1992"></a> -<span class="sourceLineNo">1993</span> replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));<a name="line.1993"></a> -<span class="sourceLineNo">1994</span> }<a name="line.1994"></a> -<span class="sourceLineNo">1995</span> }<a name="line.1995"></a> -<span class="sourceLineNo">1996</span> checkRegionConsistencyConcurrently(replicaWorkItems);<a name="line.1996"></a> -<span class="sourceLineNo">1997</span> setCheckHdfs(prevHdfsCheck);<a name="line.1997"></a> -<span class="sourceLineNo">1998</span><a name="line.1998"></a> -<span class="sourceLineNo">1999</span> // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might<a name="line.1999"></a> -<span class="sourceLineNo">2000</span> // not get accurate state of the hbase if continuing. The config here allows users to tune<a name="line.2000"></a> -<span class="sourceLineNo">2001</span> // the tolerance of number of skipped region.<a name="line.2001"></a> -<span class="sourceLineNo">2002</span> // TODO: evaluate the consequence to continue the hbck operation without config.<a name="line.2002"></a> -<span class="sourceLineNo">2003</span> int terminateThreshold = getConf().getInt("hbase.hbck.skipped.regions.limit", 0);<a name="line.2003"></a> -<span class="sourceLineNo">2004</span> int numOfSkippedRegions = skippedRegions.size();<a name="line.2004"></a> -<span class="sourceLineNo">2005</span> if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {<a name="line.2005"></a> -<span class="sourceLineNo">2006</span> throw new IOException(numOfSkippedRegions<a name="line.2006"></a> -<span class="sourceLineNo">2007</span> + " region(s) could not be checked or repaired. See logs for detail.");<a name="line.2007"></a> -<span class="sourceLineNo">2008</span> }<a name="line.2008"></a> -<span class="sourceLineNo">2009</span><a name="line.2009"></a> -<span class="sourceLineNo">2010</span> if (shouldCheckHdfs()) {<a name="line.2010"></a> -<span class="sourceLineNo">2011</span> checkAndFixTableStates();<a name="line.2011"></a> -<span class="sourceLineNo">2012</span> }<a name="line.2012"></a> -<span class="sourceLineNo">2013</span> }<a name="line.2013"></a> +<span class="sourceLineNo">1956</span> // loop to contact each region server in parallel<a name="line.1956"></a> +<span class="sourceLineNo">1957</span> for (ServerName rsinfo: regionServerList) {<a name="line.1957"></a> +<span class="sourceLineNo">1958</span> workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));<a name="line.1958"></a> +<span class="sourceLineNo">1959</span> }<a name="line.1959"></a> +<span class="sourceLineNo">1960</span><a name="line.1960"></a> +<span class="sourceLineNo">1961</span> workFutures = executor.invokeAll(workItems);<a name="line.1961"></a> +<span class="sourceLineNo">1962</span><a name="line.1962"></a> +<span class="sourceLineNo">1963</span> for(int i=0; i<workFutures.size(); i++) {<a name="line.1963"></a> +<span class="sourceLineNo">1964</span> WorkItemRegion item = workItems.get(i);<a name="line.1964"></a> +<span class="sourceLineNo">1965</span> Future<Void> f = workFutures.get(i);<a name="line.1965"></a> +<span class="sourceLineNo">1966</span> try {<a name="line.1966"></a> +<span class="sourceLineNo">1967</span> f.get();<a name="line.1967"></a> +<span class="sourceLineNo">1968</span> } catch(ExecutionException e) {<a name="line.1968"></a> +<span class="sourceLineNo">1969</span> LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),<a name="line.1969"></a> +<span class="sourceLineNo">1970</span> e.getCause());<a name="line.1970"></a> +<span class="sourceLineNo">1971</span> }<a name="line.1971"></a> +<span class="sourceLineNo">1972</span> }<a name="line.1972"></a> +<span class="sourceLineNo">1973</span> }<a name="line.1973"></a> +<span class="sourceLineNo">1974</span><a name="line.1974"></a> +<span class="sourceLineNo">1975</span> /**<a name="line.1975"></a> +<span class="sourceLineNo">1976</span> * Check consistency of all regions that have been found in previous phases.<a name="line.1976"></a> +<span class="sourceLineNo">1977</span> */<a name="line.1977"></a> +<span class="sourceLineNo">1978</span> private void checkAndFixConsistency()<a name="line.1978"></a> +<span class="sourceLineNo">1979</span> throws IOException, KeeperException, InterruptedException {<a name="line.1979"></a> +<span class="sourceLineNo">1980</span> // Divide the checks in two phases. One for default/primary replicas and another<a name="line.1980"></a> +<span class="sourceLineNo">1981</span> // for the non-primary ones. Keeps code cleaner this way.<a name="line.1981"></a> +<span class="sourceLineNo">1982</span><a name="line.1982"></a> +<span class="sourceLineNo">1983</span> List<CheckRegionConsistencyWorkItem> workItems = new ArrayList<>(regionInfoMap.size());<a name="line.1983"></a> +<span class="sourceLineNo">1984</span> for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {<a name="line.1984"></a> +<span class="sourceLineNo">1985</span> if (e.getValue().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {<a name="line.1985"></a> +<span class="sourceLineNo">1986</span> workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));<a name="line.1986"></a> +<span class="sourceLineNo">1987</span> }<a name="line.1987"></a> +<span class="sourceLineNo">1988</span> }<a name="line.1988"></a> +<span class="sourceLineNo">1989</span> checkRegionConsistencyConcurrently(workItems);<a name="line.1989"></a> +<span class="sourceLineNo">1990</span><a name="line.1990"></a> +<span class="sourceLineNo">1991</span> boolean prevHdfsCheck = shouldCheckHdfs();<a name="line.1991"></a> +<span class="sourceLineNo">1992</span> setCheckHdfs(false); //replicas don't have any hdfs data<a name="line.1992"></a> +<span class="sourceLineNo">1993</span> // Run a pass over the replicas and fix any assignment issues that exist on the currently<a name="line.1993"></a> +<span class="sourceLineNo">1994</span> // deployed/undeployed replicas.<a name="line.1994"></a> +<span class="sourceLineNo">1995</span> List<CheckRegionConsistencyWorkItem> replicaWorkItems = new ArrayList<>(regionInfoMap.size());<a name="line.1995"></a> +<span class="sourceLineNo">1996</span> for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {<a name="line.1996"></a> +<span class="sourceLineNo">1997</span> if (e.getValue().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {<a name="line.1997"></a> +<span class="sourceLineNo">1998</span> replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));<a name="line.1998"></a> +<span class="sourceLineNo">1999</span> }<a name="line.1999"></a> +<span class="sourceLineNo">2000</span> }<a name="line.2000"></a> +<span class="sourceLineNo">2001</span> checkRegionConsistencyConcurrently(replicaWorkItems);<a name="line.2001"></a> +<span class="sourceLineNo">2002</span> setCheckHdfs(prevHdfsCheck);<a name="line.2002"></a> +<span class="sourceLineNo">2003</span><a name="line.2003"></a> +<span class="sourceLineNo">2004</span> // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might<a name="line.2004"></a> +<span class="sourceLineNo">2005</span> // not get accurate state of the hbase if continuing. The config here allows users to tune<a name="line.2005"></a> +<span class="sourceLineNo">2006</span> // the tolerance of number of skipped region.<a name="line.2006"></a> +<span class="sourceLineNo">2007</span> // TODO: evaluate the consequence to continue the hbck operation without config.<a name="line.2007"></a> +<span class="sourceLineNo">2008</span> int terminateThreshold = getConf().getInt("hbase.hbck.skipped.regions.limit", 0);<a name="line.2008"></a> +<span class="sourceLineNo">2009</span> int numOfSkippedRegions = skippedRegions.size();<a name="line.2009"></a> +<span class="sourceLineNo">2010</span> if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {<a name="line.2010"></a> +<span class="sourceLineNo">2011</span> throw new IOException(numOfSkippedRegions<a name="line.2011"></a> +<span class="sourceLineNo">2012</span> + " region(s) could not be checked or repaired. See logs for detail.");<a name="line.2012"></a> +<span class="sourceLineNo">2013</span> }<a name="line.2013"></a> <span class="sourceLineNo">2014</span><a name="line.2014"></a> -<span class="sourceLineNo">2015</span> /**<a name="line.2015"></a> -<span class="sourceLineNo">2016</span> * Check consistency of all regions using mulitple threads concurrently.<a name="line.2016"></a> -<span class="sourceLineNo">2017</span> */<a name="line.2017"></a> -<span class="sourceLineNo">2018</span> private void checkRegionConsistencyConcurrently(<a name="line.2018"></a> -<span class="sourceLineNo">2019</span> final List<CheckRegionConsistencyWorkItem> workItems)<a name="line.2019"></a> -<span class="sourceLineNo">2020</span> throws IOException, KeeperException, InterruptedException {<a name="line.2020"></a> -<span class="sourceLineNo">2021</span> if (workItems.isEmpty()) {<a name="line.2021"></a> -<span class="sourceLineNo">2022</span> return; // nothing to check<a name="line.2022"></a> -<span class="sourceLineNo">2023</span> }<a name="line.2023"></a> -<span class="sourceLineNo">2024</span><a name="line.2024"></a> -<span class="sourceLineNo">2025</span> List<Future<Void>> workFutures = executor.invokeAll(workItems);<a name="line.2025"></a> -<span class="sourceLineNo">2026</span> for(Future<Void> f: workFutures) {<a name="line.2026"></a> -<span class="sourceLineNo">2027</span> try {<a name="line.2027"></a> -<span class="sourceLineNo">2028</span> f.get();<a name="line.2028"></a> -<span class="sourceLineNo">2029</span> } catch(ExecutionException e1) {<a name="line.2029"></a> -<span class="sourceLineNo">2030</span> LOG.warn("Could not check region consistency " , e1.getCause());<a name="line.2030"></a> -<span class="sourceLineNo">2031</span> if (e1.getCause() instanceof IOException) {<a name="line.2031"></a> -<span class="sourceLineNo">2032</span> throw (IOException)e1.getCause();<a name="line.2032"></a> -<span class="sourceLineNo">2033</span> } else if (e1.getCause() instanceof KeeperException) {<a name="line.2033"></a> -<span class="sourceLineNo">2034</span> throw (KeeperException)e1.getCause();<a name="line.2034"></a> -<span class="sourceLineNo">2035</span> } else if (e1.getCause() instanceof InterruptedException) {<a name="line.2035"></a> -<span class="sourceLineNo">2036</span> throw (InterruptedException)e1.getCause();<a name="line.2036"></a> -<span class="sourceLineNo">2037</span> } else {<a name="line.2037"></a> -<span class="sourceLineNo">2038</span> throw new IOException(e1.getCause());<a name="line.2038"></a> -<span class="sourceLineNo">2039</span> }<a name="line.2039"></a> -<span class="sourceLineNo">2040</span> }<a name="line.2040"></a> -<span class="sourceLineNo">2041</span> }<a name="line.2041"></a> -<span class="sourceLineNo">2042</span> }<a name="line.2042"></a> -<span class="sourceLineNo">2043</span><a name="line.2043"></a> -<span class="sourceLineNo">2044</span> class CheckRegionConsistencyWorkItem implements Callable<Void> {<a name="line.2044"></a> -<span class="sourceLineNo">2045</span> private final String key;<a name="line.2045"></a> -<span class="sourceLineNo">2046</span> private final HbckInfo hbi;<a name="line.2046"></a> -<span class="sourceLineNo">2047</span><a name="line.2047"></a> -<span class="sourceLineNo">2048</span> CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {<a name="line.2048"></a> -<span class="sourceLineNo">2049</span> this.key = key;<a name="line.2049"></a> -<span class="sourceLineNo">2050</span> this.hbi = hbi;<a name="line.2050"></a> -<span class="sourceLineNo">2051</span> }<a name="line.2051"></a> +<span class="sourceLineNo">2015</span> if (shouldCheckHdfs()) {<a name="line.2015"></a> +<span class="sourceLineNo">2016</span> checkAndFixTableStates();<a name="line.2016"></a> +<span class="sourceLineNo">2017</span> }<a name="line.2017"></a> +<span class="sourceLineNo">2018</span> }<a name="line.2018"></a> +<span class="sourceLineNo">2019</span><a name="line.2019"></a> +<span class="sourceLineNo">2020</span> /**<a name="line.2020"></a> +<span class="sourceLineNo">2021</span> * Check consistency of all regions using mulitple threads concurrently.<a name="line.2021"></a> +<span class="sourceLineNo">2022</span> */<a name="line.2022"></a> +<span class="sourceLineNo">2023</span> private void checkRegionConsistencyConcurrently(<a name="line.2023"></a> +<span class="sourceLineNo">2024</span> final List<CheckRegionConsistencyWorkItem> workItems)<a name="line.2024"></a> +<span class="sourceLineNo">2025</span> throws IOException, KeeperException, InterruptedException {<a name="line.2025"></a> +<span class="sourceLineNo">2026</span> if (workItems.isEmpty()) {<a name="line.2026"></a> +<span class="sourceLineNo">2027</span> return; // nothing to check<a name="line.2027"></a> +<span class="sourceLineNo">2028</span> }<a name="line.2028"></a> +<span class="sourceLineNo">2029</span><a name="line.2029"></a> +<span class="sourceLineNo">2030</span> List<Future<Void>> workFutures = executor.invokeAll(workItems);<a name="line.2030"></a> +<span class="sourceLineNo">2031</span> for(Future<Void> f: workFutures) {<a name="line.2031"></a> +<span class="sourceLineNo">2032</span> try {<a name="line.2032"></a> +<span class="sourceLineNo">2033</span> f.get();<a name="line.2033"></a> +<span class="sourceLineNo">2034</span> } catch(ExecutionException e1) {<a name="line.2034"></a> +<span class="sourceLineNo">2035</span> LOG.warn("Could not check region consistency " , e1.getCause());<a name="line.2035"></a> +<span class="sourceLineNo">2036</span> if (e1.getCause() instanceof IOException) {<a name="line.2036"></a> +<span class="sourceLineNo">2037</span> throw (IOException)e1.getCause();<a name="line.2037"></a> +<span class="sourceLineNo">2038</span> } else if (e1.getCause() instanceof KeeperException) {<a name="line.2038"></a> +<span class="sourceLineNo">2039</span> throw (KeeperException)e1.getCause();<a name="line.2039"></a> +<span class="sourceLineNo">2040</span> } else if (e1.getCause() instanceof InterruptedException) {<a name="line.2040"></a> +<span class="sourceLineNo">2041</span> throw (InterruptedException)e1.getCause();<a name="line.2041"></a> +<span class="sourceLineNo">2042</span> } else {<a name="line.2042"></a> +<span class="sourceLineNo">2043</span> throw new IOException(e1.getCause());<a name="line.2043"></a> +<span class="sourceLineNo">2044</span> }<a name="line.2044"></a> +<span class="sourceLineNo">2045</span> }<a name="line.2045"></a> +<span class="sourceLineNo">2046</span> }<a name="line.2046"></a> +<span class="sourceLineNo">2047</span> }<a name="line.2047"></a> +<span class="sourceLineNo">2048</span><a name="line.2048"></a> +<span class="sourceLineNo">2049</span> class CheckRegionConsistencyWorkItem implements Callable<Void> {<a name="line.2049"></a> +<span class="sourceLineNo">2050</span> private final String key;<a name="line.2050"></a> +<span class="sourceLineNo">2051</span> private final HbckInfo hbi;<a name="line.2051"></a> <span class="sourceLineNo">2052</span><a name="line.2052"></a> -<span class="sourceLineNo">2053</span> @Override<a name="line.2053"></a> -<span class="sourceLineNo">2054</span> public synchronized Void call() throws Exception {<a name="line.2054"></a> -<span class="sourceLineNo">2055</span> try {<a name="line.2055"></a> -<span class="sourceLineNo">2056</span> checkRegionConsistency(key, hbi);<a name="line.2056"></a> -<span class="sourceLineNo">2057</span> } catch (Exception e) {<a name="line.2057"></a> -<span class="sourceLineNo">2058</span> // If the region is non-META region, skip this region and send warning/error message; if<a name="line.2058"></a> -<span class="sourceLineNo">2059</span> // the region is META region, we should not continue.<a name="line.2059"></a> -<span class="sourceLineNo">2060</span> LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()<a name="line.2060"></a> -<span class="sourceLineNo">2061</span> + "'.", e);<a name="line.2061"></a> -<span class="sourceLineNo">2062</span> if (hbi.getHdfsHRI().isMetaRegion()) {<a name="line.2062"></a> -<span class="sourceLineNo">2063</span> throw e;<a name="line.2063"></a> -<span class="sourceLineNo">2064</span> }<a name="line.2064"></a> -<span class="sourceLineNo">2065</span> LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");<a name="line.2065"></a> -<span class="sourceLineNo">2066</span> addSkippedRegion(hbi);<a name="line.2066"></a> -<span class="sourceLineNo">2067</span> }<a name="line.2067"></a> -<span class="sourceLineNo">2068</span> return null;<a name="line.2068"></a> -<span class="sourceLineNo">2069</span> }<a name="line.2069"></a> -<span class="sourceLineNo">2070</span> }<a name="line.2070"></a> -<span class="sourceLineNo">2071</span><a name="line.2071"></a> -<span class="sourceLineNo">2072</span> private void addSkippedRegion(final HbckInfo hbi) {<a name="line.2072"></a> -<span class="sourceLineNo">2073</span> Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());<a name="line.2073"></a> -<span class="sourceLineNo">2074</span> if (skippedRegionNames == null) {<a name="line.2074"></a> -<span class="sourceLineNo">2075</span> skippedRegionNames = new HashSet<>();<a name="line.2075"></a> -<span class="sourceLineNo">2076</span> }<a name="line.2076"></a> -<span class="sourceLineNo">2077</span> skippedRegionNames.add(hbi.getRegionNameAsString());<a name="line.2077"></a> -<span class="sourceLineNo">2078</span> skippedRegions.put(hbi.getTableName(), skippedRegionNames);<a name="line.2078"></a> -<span class="sourceLineNo">2079</span> }<a name="line.2079"></a> -<span class="sourceLineNo">2080</span><a name="line.2080"></a> -<span class="sourceLineNo">2081</span> /**<a name="line.2081"></a> -<span class="sourceLineNo">2082</span> * Check and fix table states, assumes full info available:<a name="line.2082"></a> -<span class="sourceLineNo">2083</span> * - tableInfos<a name="line.2083"></a> -<span class="sourceLineNo">2084</span> * - empty tables loaded<a name="line.2084"></a> -<span class="sourceLineNo">2085</span> */<a name="line.2085"></a> -<span class="sourceLineNo">2086</span> private void checkAndFixTableStates() throws IOException {<a name="line.2086"></a> -<span class="sourceLineNo">2087</span> // first check dangling states<a name="line.2087"></a> -<span class="sourceLineNo">2088</span> for (Entry<TableName, TableState> entry : tableStates.entrySet()) {<a name="line.2088"></a> -<span class="sourceLineNo">2089</span> TableName tableName = entry.getKey();<a name="line.2089"></a> -<span class="sourceLineNo">2090</span> TableState tableState = entry.getValue();<a name="line.2090"></a> -<span class="sourceLineNo">2091</span> TableInfo tableInfo = tablesInfo.get(tableName);<a name="line.2091"></a> -<span class="sourceLineNo">2092</span> if (isTableIncluded(tableName)<a name="line.2092"></a> -<span class="sourceLineNo">2093</span> && !tableName.isSystemTable()<a name="line.2093"></a> -<span class="sourceLineNo">2094</span> && tableInfo == null) {<a name="line.2094"></a> -<span class="sourceLineNo">2095</span> if (fixMeta) {<a name="line.2095"></a> -<span class="sourceLineNo">2096</span> MetaTableAccessor.deleteTableState(connection, tableName);<a name="line.2096"></a> -<span class="sourceLineNo">2097</span> TableState state = MetaTableAccessor.getTableState(connection, tableName);<a name="line.2097"></a> -<span class="sourceLineNo">2098</span> if (state != null) {<a name="line.2098"></a> -<span class="sourceLineNo">2099</span> errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,<a name="line.2099"></a> -<span class="sourceLineNo">2100</span> tableName + " unable to delete dangling table state " + tableState);<a name="line.2100"></a> -<span class="sourceLineNo">2101</span> }<a name="line.2101"></a> -<span class="sourceLineNo">2102</span> } else {<a name="line.2102"></a> -<span class="sourceLineNo">2103</span> errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,<a name="line.2103"></a> -<span class="sourceLineNo">2104</span> tableName + " has dangling table state " + tableState);<a name="line.2104"></a> -<span class="sourceLineNo">2105</span> }<a name="line.2105"></a> -<span class="sourceLineNo">2106</span> }<a name="line.2106"></a> -<span class="sourceLineNo">2107</span> }<a name="line.2107"></a> -<span class="sourceLineNo">2108</span> // check that all tables have states<a name="line.2108"></a> -<span class="sourceLineNo">2109</span> for (TableName tableName : tablesInfo.keySet()) {<a name="line.2109"></a> -<span class="sourceLineNo">2110</span> if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {<a name="line.2110"></a> -<span class="sourceLineNo">2111</span> if (fixMeta) {<a name="line.2111"></a> -<span class="sourceLineNo">2112</span> MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);<a name="line.2112"></a> -<span class="sourceLineNo">2113</span> TableState newState = MetaTableAccessor.getTableState(connection, tableName);<a name="line.2113"></a> -<span class="sourceLineNo">2114</span> if (newState == null) {<a name="line.2114"></a> -<span class="sourceLineNo">2115</span> errors.reportError(ERROR_CODE.NO_TABLE_STATE,<a name="line.2115"></a> -<span class="sourceLineNo">2116</span> "Unable to change state for table " + tableName + " in meta ");<a name="line.2116"></a> -<span class="sourceLineNo">2117</span> }<a name="line.2117"></a> -<span class="sourceLineNo">2118</span> } else {<a name="line.2118"></a> -<span class="sourceLineNo">2119</span> errors.reportError(ERROR_CODE.NO_TABLE_STATE,<a name="line.2119"></a> -<span class="sourceLineNo">2120</span> tableName + " has no state in meta ");<a name="line.2120"></a> -<span class="sourceLineNo">2121</span> }<a name="line.2121"></a> -<span class="sourceLineNo">2122</span> }<a name="line.2122"></a> -<span class="sourceLineNo">2123</span> }<a name="line.2123"></a> -<span class="sourceLineNo">2124</span> }<a name="line.2124"></a> -<span class="sourceLineNo">2125</span><a name="line.2125"></a> -<span class="sourceLineNo">2126</span> private void preCheckPermission() throws IOException, AccessDeniedException {<a name="line.2126"></a> -<span class="sourceLineNo">2127</span> if (shouldIgnorePreCheckPermission()) {<a name="line.2127"></a> -<span class="sourceLineNo">2128</span> return;<a name="line.2128"></a> -<span class="sourceLineNo">2129</span> }<a name="line.2129"></a> +<span class="sourceLineNo">2053</span> CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {<a name="line.2053"></a> +<span class="sourceLineNo">2054</span> this.key = key;<a name="line.2054"></a> +<span class="sourceLineNo">2055</span> this.hbi = hbi;<a name="line.2055"></a> +<span class="sourceLineNo">2056</span> }<a name="line.2056"></a> +<span class="sourceLineNo">2057</span><a name="line.2057"></a> +<span class="sourceLineNo">2058</span> @Override<a name="line.2058"></a> +<span class="sourceLineNo">2059</span> public synchronized Void call() throws Exception {<a name="line.2059"></a> +<span class="sourceLineNo">2060</span> try {<a name="line.2060"></a> +<span class="sourceLineNo">2061</span> checkRegionConsistency(key, hbi);<a name="line.2061"></a> +<span class="sourceLineNo">2062</span> } catch (Exception e) {<a name="line.2062"></a> +<span class="sourceLineNo">2063</span> // If the region is non-META region, skip this region and send warning/error message; if<a name="line.2063"></a> +<span class="sourceLineNo">2064</span> // the region is META region, we should not continue.<a name="line.2064"></a> +<span class="sourceLineNo">2065</span> LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()<a name="line.2065"></a> +<span class="sourceLineNo">2066</span> + "'.", e);<a name="line.2066"></a> +<span class="sourceLineNo">2067</span> if (hbi.getHdfsHRI().isMetaRegion()) {<a name="line.2067"></a> +<span class="sourceLineNo">2068</span> throw e;<a name="line.2068"></a> +<span class="sourceLineNo">2069</span> }<a name="line.2069"></a> +<span class="sourceLineNo">2070</span> LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");<a name="line.2070"></a> +<span class="sourceLineNo">2071</span> addSkippedRegion(hbi);<a name="line.2071"></a> +<span class="sourceLineNo">2072</span> }<a name="line.2072"></a> +<span class="sourceLineNo">2073</span> return null;<a name="line.2073"></a> +<span class="sourceLineNo">2074</span> }<a name="line.2074"></a> +<span class="sourceLineNo">2075</span> }<a name="line.2075"></a> +<span class="sourceLineNo">2076</span><a name="line.2076"></a> +<span class="sourceLineNo">2077</span> private void addSkippedRegion(final HbckInfo hbi) {<a name="line.2077"></a> +<span class="sourceLineNo">2078</span> Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());<a name="line.2078"></a> +<span class="sourceLineNo">2079</span> if (skippedRegionNames == null) {<a name="line.2079"></a> +<span class="sourceLineNo">2080</span> skippedRegionNames = new HashSet<>();<a name="line.2080"></a> +<span class="sourceLineNo">2081</span> }<a name="line.2081"></a> +<span class="sourceLineNo">2082</span> skippedRegionNames.add(hbi.getRegionNameAsString());<a name="line.2082"></a> +<span class="sourceLineNo">2083</span> skippedRegions.put(hbi.getTableName(), skippedRegionNames);<a name="line.2083"></a> +<span class="sourceLineNo">2084</span> }<a name="line.2084"></a> +<span class="sourceLineNo">2085</span><a name="line.2085"></a> +<span class="sourceLineNo">2086</span> /**<a name="line.2086"></a> +<span class="sourceLineNo">2087</span> * Check and fix table states, assumes full info available:<a name="line.2087"></a> +<span class="sourceLineNo">2088</span> * - tableInfos<a name="line.2088"></a> +<span class="sourceLineNo">2089</span> * - empty tables loaded<a name="line.2089"></a> +<span class="sourceLineNo">2090</span> */<a name="line.2090"></a> +<span class="sourceLineNo">2091</span> private void checkAndFixTableStates() throws IOException {<a name="line.2091"></a> +<span class="sourceLineNo">2092</span> // first check dangling states<a name="line.2092"></a> +<span class="sourceLineNo">2093</span> for (Entry<TableName, TableState> entry : tableStates.entrySet()) {<a name="line.2093"></a> +<span class="sourceLineNo">2094</span> TableName tableName = entry.getKey();<a name="line.2094"></a> +<span class="sourceLineNo">2095</span> TableState tableState = entry.getValue();<a name="line.2095"></a> +<span class="sourceLineNo">2096</span> TableInfo tableInfo = tablesInfo.get(tableName);<a name="line.2096"></a> +<span class="sourceLineNo">2097</span> if (isTableIncluded(tableName)<a name="line.2097"></a> +<span class="sourceLineNo">2098</span> && !tableName.isSystemTable()<a name="line.2098"></a> +<span class="sourceLineNo">2099</span> && tableInfo == null) {<a name="line.2099"></a> +<span class="sourceLineNo">2100</span> if (fixMeta) {<a name="line.2100"></a> +<span class="sourceLineNo">2101</span> MetaTableAccessor.deleteTableState(connection, tableName);<a name="line.2101"></a> +<span class="sourceLineNo">2102</span> TableState state = MetaTableAccessor.getTableState(connection, tableName);<a name="line.2102"></a> +<span class="sourceLineNo">2103</span> if (state != null) {<a name="line.2103"></a> +<span class="sourceLineNo">2104</span> errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,<a name="line.2104"></a> +<span class="sourceLineNo">2105</span> tableName + " unable to delete dangling table state " + tableState);<a name="line.2105"></a> +<span class="sourceLineNo">2106</span> }<a name="line.2106"></a> +<span class="sourceLineNo">2107</span> } else {<a name="line.2107"></a> +<span class="sourceLineNo">2108</span> errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,<a name="line.2108"></a> +<span class="sourceLineNo">2109</span> tableName + " has dangling table state " + tableState);<a name="line.2109"></a> +<span class="sourceLineNo">2110</span> }<a name="line.2110"></a> +<span class="sourceLineNo">2111</span> }<a name="line.2111"></a> +<span class="sourceLineNo">2112</span> }<a name="line.2112"></a> +<span class="sourceLineNo">2113</span> // check that all tables have states<a name="line.2113"></a> +<span class="sourceLineNo">2114</span> for (TableName tableName : tablesInfo.keySet()) {<a name="line.2114"></a> +<span class="sourceLineNo">2115</span> if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {<a name="line.2115"></a> +<span class="sourceLineNo">2116</span> if (fixMeta) {<a name="line.2116"></a> +<span class="sourceLineNo">2117</span> MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);<a name="line.2117"></a> +<span class="sourceLineNo">2118</span> TableState newState = MetaTableAccessor.getTableState(connection, tableName);<a name="line.2118"></a> +<span class="sourceLineNo">2119</span> if (newState == null) {<a name="line.2119"></a> +<span class="sourceLineNo">2120</span> errors.reportError(ERROR_CODE.NO_TABLE_STATE,<a name="line.2120"></a> +<span class="sourceLineNo">2121</span> "Unable to change state for table " + tableName + " in meta ");<a name="line.2121"></a> +<span class="sourceLineNo">2122</span> }<a name="line.2122"></a> +<span class="sourceLineNo">2123</span> } else {<a name="line.2123"></a> +<span class="sourceLineNo">2124</span> errors.reportError(ERROR_CODE.NO_TABLE_STATE,<a name="line.2124"></a> +<span class="sourceLineNo">2125</span> tableName + " has no state in meta ");<a name="line.2125"></a> +<span class="sourceLineNo">2126</span> }<a name="line.2126"></a> +<span class="sourceLineNo">2127</span> }<a name="line.2127"></a> +<span class="sourceLineNo">2128</span> }<a name="line.2128"></a> +<span class="sourceLineNo">2129</span> }<a name="line.2129"></a> <span class="sourceLineNo">2130</span><a name="line.2130"></a> -<span class="sourceLineNo">2131</span> Path hbaseDir = FSUtils.getRootDir(getConf());<a name="line.2131"></a> -<span class="sourceLineNo">2132</span> FileSystem fs = hbaseDir.getFileSystem(getConf());<a name="line.2132"></a> -<span class="sourceLineNo">2133</span> UserProvider userProvider = UserProvider.instantiate(getConf());<a name="line.2133"></a> -<span class="sourceLineNo">2134</span> UserGroupInformation ugi = userProvider.getCurrent().getUGI();<a name="line.2134"></a> -<span class="sourceLineNo">2135</span> FileStatus[] files = fs.listStatus(hbaseDir);<a name="line.2135"></a> -<span class="sourceLineNo">2136</span> for (FileStatus file : files) {<a name="line.2136"></a> -<span class="sourceLineNo">2137</span> try {<a name="line.2137"></a> -<span class="sourceLineNo">2138</span> FSUtils.checkAccess(ugi, file, FsAction.WRITE);<a name="line.2138"></a> -<span class="sourceLineNo">2139</span> } catch (AccessDeniedException ace) {<a name="line.2139"></a> -<span class="sourceLineNo">2140</span> LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);<a name="line.2140"></a> -<span class="sourceLineNo">2141</span> errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()<a name="line.2141"></a> -<span class="sourceLineNo">2142</span> + " does not have write perms to " + file.getPath()<a name="line.2142"></a> -<span class="sourceLineNo">2143</span> + ". Please rerun hbck as hdfs user " + file.getOwner());<a name="line.2143"></a> -<span class="sourceLineNo">2144</span> throw ace;<a name="line.2144"></a> -<span class="sourceLineNo">2145</span> }<a name="line.2145"></a> -<span class="sourceLineNo">2146</span> }<a name="line.2146"></a> -<span class="sourceLineNo">2147</span> }<a name="line.2147"></a> -<span class="sourceLineNo">2148</span><a name="line.2148"></a> -<span class="sourceLineNo">2149</span> /**<a name="line.2149"></a> -<span class="sourceLineNo">2150</span> * Deletes region from meta table<a name="line.2150"></a> -<span class="sourceLineNo">2151</span> */<a name="line.2151"></a> -<span class="sourceLineNo">2152</span> private void deleteMetaRegion(HbckInfo hi) throws IOException {<a name="line.2152"></a> -<span class="sourceLineNo">2153</span> deleteMetaRegion(hi.metaEntry.getRegionName());<a name="line.2153"></a> -<span class="sourceLineNo">2154</span> }<a name="line.2154"></a> -<span class="sourceLineNo">2155</span><a name="line.2155"></a> -<span class="sourceLineNo">2156</span> /**<a name="line.2156"></a> -<span class="sourceLineNo">2157</span> * Deletes region from meta table<a name="line.2157"></a> -<span class="sourceLineNo">2158</span> */<a name="line.2158"></a> -<span class="sourceLineNo">2159</span> private void deleteMetaRegion(byte[] metaKey) throws IOException {<a name="line.2159"></a> -<span class="sourceLineNo">2160</span> Delete d = new Delete(metaKey);<a name="line.2160"></a> -<span class="sourceLineNo">2161</span> meta.delete(d);<a name="line.2161"></a> -<span class="sourceLineNo">2162</span> LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );<a name="line.2162"></a> -<span class="sourceLineNo">2163</span> }<a name="line.2163"></a> -<span class="sourceLineNo">2164</span><a name="line.2164"></a> -<span class="sourceLineNo">2165</span> /**<a name="line.2165"></a> -<span class="sourceLineNo">2166</span> * Reset the split parent region info in meta table<a name="line.2166"></a> -<span class="sourceLineNo">2167</span> */<a name="line.2167"></a> -<span class="sourceLineNo">2168</span> private void resetSplitParent(HbckInfo hi) throws IOException {<a name="line.2168"></a> -<span class="sourceLineNo">2169</span> RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());<a name="line.2169"></a> -<span class="sourceLineNo">2170</span> Delete d = new Delete(hi.metaEntry.getRegionName());<a name="line.2170"></a> -<span class="sourceLineNo">2171</span> d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);<a name="line.2171"></a> -<span class="sourceLineNo">2172</span> d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);<a name="line.2172"></a> -<span class="sourceLineNo">2173</span> mutations.add(d);<a name="line.2173"></a> -<span class="sourceLineNo">2174</span><a name="line.2174"></a> -<span class="sourceLineNo">2175</span> RegionInfo hri = RegionInfoBuilder.newBuilder(hi.metaEntry)<a name="line.2175"></a> -<span class="sourceLineNo">2176</span> .setOffline(false)<a name="line.2176"></a> -<span class="sourceLineNo">2177</span> .setSplit(false)<a name="line.2177"></a> -<span class="sourceLineNo">2178</span> .build();<a name="line.2178"></a> -<span class="sourceLineNo">2179</span> Put p = MetaTableAccessor.makePutFromRegionInfo(hri);<a name="line.2179"></a> -<span class="sourceLineNo">2180</span> mutations.add(p);<a name="line.2180"></a> -<span class="sourceLineNo">2181</span><a name="line.2181"></a> -<span class="sourceLineNo">2182</span> meta.mutateRow(mutations);<a name="line.2182"></a> -<span class="sourceLineNo">2183</span> LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );<a name="line.2183"></a> -<span class="sourceLineNo">2184</span> }<a name="line.2184"></a> -<span class="sourceLineNo">2185</span><a name="line.2185"></a> -<span class="sourceLineNo">2186</span> /**<a name="line.2186"></a> -<span class="sourceLineNo">2187</span> * This backwards-compatibility wrapper for permanently offlining a region<a name="line.2187"></a> -<span class="sourceLineNo">2188</span> * that should not be alive. If the region server does not support the<a name="line.2188"></a> -<span class="sourceLineNo">2189</span> * "offline" method, it will use the closest unassign method instead. This<a name="line.2189"></a> -<span class="sourceLineNo">2190</span> * will basically work until one attempts to disable or delete the affected<a name="line.2190"></a> -<span class="sourceLineNo">2191</span> * table. The problem has to do with in-memory only master state, so<a name="line.2191"></a> -<span class="sourceLineNo">2192</span> * restarting the HMaster or failing over to another should fix this.<a name="line.2192"></a> -<span class="sourceLineNo">2193</span> */<a name="line.2193"></a> -<span class="sourceLineNo">2194</span> private void offline(byte[] regionName) throws IOException {<a name="line.2194"></a> -<span class="sourceLineNo">2195</span> String regionString = Bytes.toStringBinary(regionName);<a name="line.2195"></a> -<span class="sourceLineNo">2196</span> if (!rsSupportsOffline) {<a name="line.2196"></a> -<span class="sourceLineNo">2197</span> LOG.warn("Using unassign region " + regionString<a name="line.2197"></a> -<span class="sourceLineNo">2198</span> + " instead of using offline method, you should"<a name="line.2198"></a> -<span class="sourceLineNo">2199</span> + " restart HMaster after these repairs");<a name="line.2199"></a> -<span class="sourceLineNo">2200</span> admin.unassign(regionName, true);<a name="line.2200"></a> -<span class="sourceLineNo">2201</span> return;<a name="line.2201"></a> -<span class="sourceLineNo">2202</span> }<a name="line.2202"></a> -<span class="sourceLineNo">2203</span><a name="line.2203"></a> -<span class="sourceLineNo">2204</span> // first time we assume the rs's supports #offline.<a name="line.2204"></a> -<span class="sourceLineNo">2205</span> try {<a name="line.2205"></a> -<span class="sourceLineNo">2206</span> LOG.info("Offlining region " + regionString);<a name="line.2206"></a> -<span class="sourceLineNo">2207</span> admin.offline(regionName);<a name="line.2207"></a> -<span class="sourceLineNo">2208</span> } catch (IOException ioe) {<a name="line.2208"></a> -<span class="sourceLineNo">2209</span> String notFoundMsg = "java.lang.NoSuchMethodException: " +<a name="line.2209"></a> -<span class="sourceLineNo">2210</span> "org.apache.hadoop.hbase.master.HMaster.offline([B)";<a name="line.2210"></a> -<span class="sourceLineNo">2211</span> if (ioe.getMessage().contains(notFoundMsg)) {<a name="line.2211"></a> -<span class="sourceLineNo">2212</span> LOG.warn("Using unassign region " + regionString<a name="line.2212"></a> -<span class="sourceLineNo">2213</span> + " instead of using offline method, you should"<a name="line.2213"></a> -<span class="sourceLineNo">2214</span> + " restart HMaster after these repairs");<a name="line.2214"></a> -<span class="sourceLineNo">2215</span> rsSupportsOffline = false; // in the future just use unassign<a name="line.2215"></a> -<span class="sourceLineNo">2216</span> admin.unassign(regionName, true);<a name="line.2216"></a> -<span class="sourceLineNo">2217</span> return;<a name="line.2217"></a> -<span class="sourceLineNo">2218</span> }<a name="line.2218"></a> -<span class="sourceLineNo">2219</span> throw ioe;<a name="line.2219"></a> -<span class="sourceLineNo">2220</span> }<a name="line.2220"></a> -<span class="sourceLineNo">2221</span> }<a name="line.2221"></a> -<span class="sourceLineNo">2222</span><a name="line.2222"></a> -<span class="sourceLineNo">2223</span> private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {<a name="line.2223"></a> -<span class="sourceLineNo">2224</span> undeployRegionsForHbi(hi);<a name="line.2224"></a> -<span class="sourceLineNo">2225</span> // undeploy replicas of the region (but only if the method is invoked for the primary)<a name="line.2225"></a> -<span class="sourceLineNo">2226</span> if (hi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {<a name="line.2226"></a> -<span class="sourceLineNo">2227</span> return;<a name="line.2227"></a> -<span class="sourceLineNo">2228</span> }<a name="line.2228"></a> -<span class="sourceLineNo">2229</span> int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();<a name="line.2229"></a> -<span class="sourceLineNo">2230</span> for (int i = 1; i < numReplicas; i++) {<a name="line.2230"></a> -<span class="sourceLineNo">2231</span> if (hi.getPrimaryHRIForDeployedReplica() == null) continue;<a name="line.2231"></a> -<span class="sourceLineNo">2232</span> RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(<a name="line.2232"></a> -<span class="sourceLineNo">2233</span> hi.getPrimaryHRIForDeployedReplica(), i);<a name="line.2233"></a> -<span class="sourceLineNo">2234</span> HbckInfo h = regionInfoMap.get(hri.getEncodedName());<a name="line.2234"></a> -<span class="sourceLineNo">2235</span> if (h != null) {<a name="line.2235"></a> -<span class="sourceLineNo">2236</span> undeployRegionsForHbi(h);<a name="line.2236"></a> -<span class="sourceLineNo">2237</span> //set skip checks; we undeployed it, and we don't want to evaluate this anymore<a name="line.2237"></a> -<span class="sourceLineNo">2238</span> //in consistency checks<a name="line.2238"></a> -<span class="sourceLineNo">2239</span> h.setSkipChecks(true);<a name="line.2239"></a> -<span class="sourceLineNo">2240</span> }<a name="line.2240"></a> -<span class="sourceLineNo">2241</span> }<a name="line.2241"></a> -<span class="sourceLineNo">2242</span> }<a name="line.2242"></a> -<span class="sourceLineNo">2243</span><a name="line.2243"></a> -<span class="sourceLineNo">2244</span> private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {<a name="line.2244"></a> -<span class="sourceLineNo">2245</span> for (OnlineEntry rse : hi.deployedEntries) {<a name="line.2245"></a> -<span class="sourceLineNo">2246</span> LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa);<a name="line.2246"></a> -<span class="sourceLineNo">2247</span> try {<a name="line.2247"></a> -<span class="sourceLineNo">2248</span> HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);<a name="line.2248"></a> -<span class="sourceLineNo">2249</span> offline(rse.hri.getRegionName());<a name="line.2249"></a> -<span class="sourceLineNo">2250</span> } catch (IOException ioe) {<a name="line.2250"></a> -<span class="sourceLineNo">2251</span> LOG.warn("Got exception when attempting to offline region "<a name="line.2251"></a> -<span class="sourceLineNo">2252</span> + Bytes.toString(rse.hri.getRegionName()), ioe);<a name="line.2252"></a> -<span class="sourceLineNo">2253</span> }<a name="line.2253"></a> -<span class="sourceLineNo">2254</span> }<a name="line.2254"></a> -<span class="sourceLineNo">2255</span> }<a name="line.2255"></a> -<span class="sourceLineNo">2256</span><a name="line.2256"></a> -<span class="sourceLineNo">2257</span> /**<a name="line.2257"></a> -<span class="sourceLineNo">2258</span> * Attempts to undeploy a region from a region server based in information in<a name="line.2258"></a> -<span class="sourceLineNo">2259</span> * META. Any operations that modify the file system should make sure that<a name="line.2259"></a> -<span class="sourceLineNo">2260</span> * its corresponding region is not deployed to prevent data races.<a name="line.2260"></a> -<span class="sourceLineNo">2261</span> *<a name="line.2261"></a> -<span class="sourceLineNo">2262</span> * A separate call is required to update the master in-memory region state<a name="line.2262"></a> -<span class="sourceLineNo">2263</span> * kept in the AssignementManager. Because disable uses this state instead of<a name="line.2263"></a> -<span class="sourceLineNo">2264</span> * that found in META, we can't seem to cleanly disable/delete tables that<a name="line.2264"></a> -<span class="sourceLineNo">2265</span> * have been hbck fixed. When used on a version of HBase that does not have<a name="line.2265"></a> -<span class="sourceLineNo">2266</span> * the offline ipc call exposed on the master (&lt;0.90.5, &lt;0.92.0) a master<a name="line.2266"></a> -<span class="sourceLineNo">2267</span> * restart or failover may be required.<a name="line.2267"></a> -<span class="sourceLineNo">2268</span> */<a name="line.2268"></a> -<span class="sourceLineNo">2269</span> private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {<a name="line.2269"></a> -<span class="sourceLineNo">2270</span> if (hi.metaEntry == null && hi.hdfsEntry == null) {<a name="line.2270"></a> -<span class="sourceLineNo">2271</span> undeployRegions(hi);<a name="line.2271"></a> -<span class="sourceLineNo">2272</span> return;<a name="line.2272"></a> -<span class="sourceLineNo">2273</span> }<a name="line.2273"></a> -<span class="sourceLineNo">2274</span><a name="line.2274"></a> -<span class="sourceLineNo">2275</span> // get assignment info and hregioninfo from meta.<a name="line.2275"></a> -<span class="sourceLineNo">2276</span> Get get = new Get(hi.getRegionName());<a name="line.2276"></a> -<span class="sourceLineNo">2277</span> get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);<a name="line.2277"></a> -<span class="sourceLineNo">2278</span> get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);<a name="line.2278"></a> -<span class="sourceLineNo">2279</span> get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);<a name="line.2279"></a> -<span class="sourceLineNo">2280</span> // also get the locations of the replicas to close if the primary region is being closed<a name="line.2280"></a> -<span class="sourceLineNo">2281</span> if (hi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {<a name="line.2281"></a> -<span class="sourceLineNo">2282</span> int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();<a name="line.2282"></a> -<span class="sourceLineNo">2283</span> for (int i = 0; i < numReplicas; i++) {<a name="line.2283"></a> -<span class="sourceLineNo">2284</span> get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));<a name="line.2284"></a> -<span class="sourceLineNo">2285</span> get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));<a name="line.2285"></a> -<span class="sourceLineNo">2286</span> }<a name="line.2286"></a> -<span class="sourceLineNo">2287</span> }<a name="line.2287"></a> -<span class="sourceLineNo">2288</span> Result r = meta.get(get);<a name="line.2288"></a> -<span class="sourceLineNo">2289</span> RegionLocations rl = MetaTableAccessor.getRegionLocations(r);<a name="line.2289"></a> -<span class="sourceLineNo">2290</span> if (rl == null) {<a name="line.2290"></a> -<span class="sourceLineNo">2291</span> LOG.warn("Unable to close region " + hi.getRegionNameAsString() +<a name="line.2291"></a> -<span class="sourceLineNo">2292</span> " since meta does not have handle to reach it");<a name="line.2292"></a> -<span class="sourceLineNo">2293</span> return;<a name="line.2293"></a> -<span class="sourceLineNo">2294</span> }<a name="line.2294"></a> -<span class="sourceLineNo">2295</span> for (HRegionLocation h : rl.getRegionLocations()) {<a name="line.2295"></a> -<span class="sourceLineNo">2296</span> ServerName serverName = h.getServerName();<a name="line.2296"></a> -<span class="sourceLineNo">2297</span> if (serverName == null) {<a name="line.2297"></a> -<span class="sourceLineNo">2298</span> errors.reportError("Unable to close region "<a name="line.2298"></a> -<span class="sourceLineNo">2299</span> + hi.getRegionNameAsString() + " because meta does not "<a name="line.2299"></a> -<span class="sourceLineNo">2300</span> + "have handle to reach it.");<a name="line.2300"></a> -<span class="sourceLineNo">2301</span> continue;<a name="line.2301"></a> -<span class="sourceLineNo">2302</span> }<a name="line.2302"></a> -<span class="sourceLineNo">2303</span> RegionInfo hri = h.getRegionInfo();<a name="line.2303"></a> -<span class="sourceLineNo">2304</span> if (hri == null) {<a name="line.2304"></a> -<span class="sourceLineNo">2305</span> LOG.warn("Unable to close region " + hi.getRegionNameAsString()<a name="line.2305"></a> -<span class="sourceLineNo">2306</span> + " because hbase:meta had invalid or missing "<a name="line.2306"></a> -<span class="sourceLineNo">2307</span> + HConstants.CATALOG_FAMILY_STR + ":"<a name="line.2307"></a> -<span class="sourceLineNo">2308</span> + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)<a name="line.2308"></a> -<span class="sourceLineNo">2309</span> + " qualifier value.");<a name="line.2309"></a> -<span class="sourceLineNo">2310</span> continue;<a name="line.2310"></a> -<span class="sourceLineNo">2311</span> }<a name="line.2311"></a> -<span class="sourceLineNo">2312</span> // close the region -- close files and remove assignment<a name="line.2312"></a> -<span class="sourceLineNo">2313</span> HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);<a name="line.2313"></a> -<span class="sourceLineNo">2314</span> }<a name="line.2314"></a> -<span class="sourceLineNo">2315</span> }<a name="line.2315"></a> -<span class="sourceLineNo">2316</span><a name="line.2316"></a> -<span class="sourceLineNo">2317</span> private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,<a name="line.2317"></a> -<span class="sourceLineNo">2318</span> KeeperException, InterruptedException {<a name="line.2318"></a> -<span class="sourceLineNo">2319</span> // If we are trying to fix the errors<a name="line.2319"></a> -<span class="sourceLineNo">2320</span> if (shouldFixAssignments()) {<a name="line.2320"></a> -<span class="sourceLineNo">2321</span> errors.print(msg);<a name="line.2321"></a> -<span class="sourceLineNo">2322</span> undeployRegions(hbi);<a name="line.2322"></a> -<span class="sourceLineNo">2323</span> setShouldRerun();<a name="line.2323"></a> -<span class="sourceLineNo">2324</span> RegionInfo hri = hbi.getHdfsHRI();<a name="line.2324"></a> -<span class="sourceLineNo">2325</span> if (hri == null) {<a name="line.2325"></a> -<span class="sourceLineNo">2326</span> hri = hbi.metaEntry;<a name="line.2326"></a> -<span class="sourceLineNo">2327</span> }<a name="line.2327"></a> -<span class="sourceLineNo">2328</span> HBaseFsckRepair.fixUnassigned(admin, hri);<a name="line.2328"></a> -<span class="sourceLineNo">2329</span> HBaseFsckRepair.waitUntilAssigned(admin, hri);<a name="line.2329"></a> -<span class="sourceLineNo">2330</span><a name="line.2330"></a> -<span class="sourceLineNo">2331</span> // also assign replicas if needed (do it only when this call operates on a primary replica)<a name="line.2331"></a> -<span class="sourceLineNo">2332</span> if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) return;<a name="line.2332"></a> -<span class="sourceLineNo">2333</span> int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();<a name="line.2333"></a> -<span class="sourceLineNo">2334</span> for (int i = 1; i < replicationCount; i++) {<a name="line.2334"></a> -<span class="sourceLineNo">2335</span> hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);<a name="line.2335"></a> -<span class="sourceLineNo">2336</span> HbckInfo h = regionInfoMap.get(hri.getEncodedName());<a name="line.2336"></a> -<span class="sourceLineNo">2337</span> if (h != null) {<a name="line.2337"></a> -<span class="sourceLineNo">2338</span> undeployRegions(h);<a name="line.2338"></a> -<span class="sourceLineNo">2339</span> //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore<a name="line.2339"></a> -<span class="sourceLineNo">2340</span> //in consistency checks<a name="line.2340"></a> -<span class="sourceLineNo">2341</span> h.setSkipChecks(true);<a name="line.2341"></a> -<span class="sourceLineNo">2342</span> }<a name="line.2342"></a> -<span class="sourceLineNo">2343</span> HBaseFsckRepair.fixUnassigned(admin, hri);<a name="line.2343"></a> -<span class="sourceLineNo">2344</span> HBaseFsckRepair.waitUntilAssigned(admin, hri);<a name="line.2344"></a> -<span class="sourceLineNo">2345</span> }<a name="line.2345"></a> -<span class="sourceLineNo">2346</span><a name="line.2346"></a> -<span class="sourceLineNo">2347</span> }<a name="line.2347"></a> -<span class="sourceLineNo">2348</span> }<a name="line.2348"></a> -<span class="sourceLineNo">2349</span><a name="line.2349"></a> -<span class="sourceLineNo">2350</span> /**<a name="line.2350"></a> -<span class="sourceLineNo">2351</span> * Check a single region for consistency and correct deployment.<a name="line.2351"></a> -<span class="sourceLineNo">2352</span> */<a name="line.2352"></a> -<span class="sourceLineNo">2353</span> private void checkRegionConsistency(final String key, final HbckInfo hbi)<a name="line.2353"></a> -<span class="sourceLineNo">2354</span> throws IOException, KeeperException, InterruptedException {<a name="line.2354"></a> -<span class="sourceLineNo">2355</span><a name="line.2355"></a> -<span class="sourceLineNo">2356</span> if (hbi.isSkipChecks()) return;<a name="line.2356"></a> -<span class="sourceLineNo">2357</span> String descriptiveName = hbi.toString();<a name="line.2357"></a> -<span class="sourceLineNo">2358</span> boolean inMeta = hbi.metaEntry != null;<a name="line.2358"></a> -<span class="sourceLineNo">2359</span> // In case not checking HDFS, assume the region is on HDFS<a name="line.2359"></a> -<span class="sourceLineNo">2360</span> b
<TRUNCATED>