[ https://issues.apache.org/jira/browse/HBASE-26209?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
shihuafeng updated HBASE-26209: ------------------------------- Description: {{ when i restart hbase cluster,i find edit file loss when wal repaly .}} the number of edit files (00000000seqid.tmp to 00000000seqid ) is 31 when split wal to edit. But when i read edits to repaly ,i foud the sum is 30. i see rename file is sucessful,but i can not find the edit file . {panel:title=/var/log/message i find system exception} ACPI Error: SMBus/IPMI/GenericSerialBus write requires Buffer of length 66, found length 32 (20130517/exfield-389) Aug 16 03:30:41 esgsh6 kernel: ACPI Error: Method parse/execution failed [\_SB_.PMI0._PMM] (Node ffff8810e9eab258), AE_AML_BUFFER_LIMIT (20130517/psparse-536) {panel} # *rename (00000000seqid.tmp to 00000000seqid is 31* i can not find the follwing file , i confirm the edit file (0*000000000001825010*)is not empty. {panel:title=log} hbase-cmf-hbase-REGIONSERVER-gy11.esgync.local.log.out:2021-08-16 17:56:28,956 INFO org.apache.hadoop.hbase.wal.WALSplitter: Rename hdfs://nameservice1/hbase/data/default/TRAFODION.JAVABENCH2.OE_STOCK_INDEX_300/8a42de414d97b457da88bc4682dd7c52/recovered.edits/0000000000001810650.temp to hdfs://nameservice1/hbase/data/default/TRAFODION.JAVABENCH2.OE_STOCK_INDEX_300/8a42de414d97b457da88bc4682dd7c52/recovered.edits/0*000000000001825010* {panel} ** you can see attachment{color:#999999} {color} *{color:#3366ff}r{color}*{color:#3366ff}*ename_edit.log*{color} *2. at replay phase , Reading the edits is 30* {panel:title=log} hbase-cmf-hbase-REGIONSERVER-gy11.esgync.local.log.out:2021-08-16 17:56:14,938 INFO org.apache.hadoop.hbase.regionserver.HRegion: after replayRecoveredEdits Maximum sequenceid 1914955 and minimum sequenceid for the region is 1916711, replay the file, path=hdfs://nameservice1/hbase/data/default/TRAFODION.JAVABENCH2.OE_STOCK_INDEX_300/8a42de414d97b457da88bc4682dd7c52/recovered.edits/0000000000001914955,seqid=1916711,*size=30* {panel} ** {code:java} org.apache.hadoop.hbase.regionserver.HRegion NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(fs, regiondir); if (LOG.isDebugEnabled()) { LOG.debug("Found " + (files == null ? 0 : files.size()) + " recovered edits file(s) under " + regiondir); } if (files == null || files.isEmpty()) return seqid; long start=System.currentTimeMillis(); for (Path edits: files) { if (edits == null || !fs.exists(edits)) { LOG.warn("Null or non-existent edits file: " + edits); continue; } if (isZeroLengthThenDelete(fs, edits)) continue; long maxSeqId; String fileName = edits.getName(); maxSeqId = Math.abs(Long.parseLong(fileName)); if (maxSeqId <= minSeqIdForTheRegion) { if (LOG.isDebugEnabled()) { String msg = "Maximum sequenceid for this wal is " + maxSeqId + " and minimum sequenceid for the region is " + minSeqIdForTheRegion + ", skipped the whole file, path=" + edits; LOG.info(msg); } continue; } try { seqid = Math.max(seqid, replayRecoveredEdits(edits, maxSeqIdInStores, reporter)); // replay the edits. Replay can return -1 if everything is skipped, only update // if seqId is greater String msg = "after replayRecoveredEdits Maximum sequenceid " + maxSeqId + " and minimum sequenceid for the region is " + minSeqIdForTheRegion + ", replay the file, path=" + edits +",seqid="+seqid+",size="+files.size(); LOG.info(msg);{code} was: {{ when i restart hbase cluster,i find edit file loss when wal repaly .}} the number of edit files (00000000seqid.tmp to 00000000seqid ) is 31 when split wal to edit. But when i read edits to repaly ,i foud the sum is 30. i see rename file is sucessful,but i can not find the edit file . {panel:title=/var/log/message i find system exception} ACPI Error: SMBus/IPMI/GenericSerialBus write requires Buffer of length 66, found length 32 (20130517/exfield-389) Aug 16 03:30:41 esgsh6 kernel: ACPI Error: Method parse/execution failed [\_SB_.PMI0._PMM] (Node ffff8810e9eab258), AE_AML_BUFFER_LIMIT (20130517/psparse-536) {panel} # *rename (00000000seqid.tmp to 00000000seqid is 31* i can find the follwing file , i confirm the edit file (0*000000000001825010*)is not empty. {panel:title=log} hbase-cmf-hbase-REGIONSERVER-gy11.esgync.local.log.out:2021-08-16 17:56:28,956 INFO org.apache.hadoop.hbase.wal.WALSplitter: Rename hdfs://nameservice1/hbase/data/default/TRAFODION.JAVABENCH2.OE_STOCK_INDEX_300/8a42de414d97b457da88bc4682dd7c52/recovered.edits/0000000000001810650.temp to hdfs://nameservice1/hbase/data/default/TRAFODION.JAVABENCH2.OE_STOCK_INDEX_300/8a42de414d97b457da88bc4682dd7c52/recovered.edits/0*000000000001825010* {panel} ** you can see attachment{color:#999999} {color} *{color:#3366ff}r{color}*{color:#3366ff}*ename_edit.log*{color} *2. at replay phase , Reading the edits is 30* {panel:title=log} hbase-cmf-hbase-REGIONSERVER-gy11.esgync.local.log.out:2021-08-16 17:56:14,938 INFO org.apache.hadoop.hbase.regionserver.HRegion: after replayRecoveredEdits Maximum sequenceid 1914955 and minimum sequenceid for the region is 1916711, replay the file, path=hdfs://nameservice1/hbase/data/default/TRAFODION.JAVABENCH2.OE_STOCK_INDEX_300/8a42de414d97b457da88bc4682dd7c52/recovered.edits/0000000000001914955,seqid=1916711,*size=30* {panel} ** {code:java} org.apache.hadoop.hbase.regionserver.HRegion NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(fs, regiondir); if (LOG.isDebugEnabled()) { LOG.debug("Found " + (files == null ? 0 : files.size()) + " recovered edits file(s) under " + regiondir); } if (files == null || files.isEmpty()) return seqid; long start=System.currentTimeMillis(); for (Path edits: files) { if (edits == null || !fs.exists(edits)) { LOG.warn("Null or non-existent edits file: " + edits); continue; } if (isZeroLengthThenDelete(fs, edits)) continue; long maxSeqId; String fileName = edits.getName(); maxSeqId = Math.abs(Long.parseLong(fileName)); if (maxSeqId <= minSeqIdForTheRegion) { if (LOG.isDebugEnabled()) { String msg = "Maximum sequenceid for this wal is " + maxSeqId + " and minimum sequenceid for the region is " + minSeqIdForTheRegion + ", skipped the whole file, path=" + edits; LOG.info(msg); } continue; } try { seqid = Math.max(seqid, replayRecoveredEdits(edits, maxSeqIdInStores, reporter)); // replay the edits. Replay can return -1 if everything is skipped, only update // if seqId is greater String msg = "after replayRecoveredEdits Maximum sequenceid " + maxSeqId + " and minimum sequenceid for the region is " + minSeqIdForTheRegion + ", replay the file, path=" + edits +",seqid="+seqid+",size="+files.size(); LOG.info(msg);{code} > edit file loss result in data loss when restart hbase cluster > --------------------------------------------------------------- > > Key: HBASE-26209 > URL: https://issues.apache.org/jira/browse/HBASE-26209 > Project: HBase > Issue Type: Bug > Components: regionserver > Environment: Linux version 3.10.0-693.el7.x86_64 > (mockbu...@x86-038.build.eng.bos.redhat.com) (gcc version 4.8.5 20150623 (Red > Hat 4.8.5-16) (GCC) ) #1 SMP Thu Jul 6 19:56:57 EDT 2017 > Reporter: shihuafeng > Priority: Blocker > Attachments: Repaly_edit.log, rename_edit.log > > > {{ when i restart hbase cluster,i find edit file loss when wal > repaly .}} > the number of edit files (00000000seqid.tmp to 00000000seqid ) is > 31 when split wal to edit. But when i read edits to repaly ,i foud the sum > is 30. > i see rename file is sucessful,but i can not find the edit file . > {panel:title=/var/log/message i find system exception} > ACPI Error: SMBus/IPMI/GenericSerialBus write requires Buffer of length 66, > found length 32 (20130517/exfield-389) > Aug 16 03:30:41 esgsh6 kernel: ACPI Error: Method parse/execution failed > [\_SB_.PMI0._PMM] (Node ffff8810e9eab258), AE_AML_BUFFER_LIMIT > (20130517/psparse-536) > {panel} > # *rename (00000000seqid.tmp to 00000000seqid is 31* > > i can not find the follwing file , i confirm the edit file > (0*000000000001825010*)is not empty. > > {panel:title=log} > hbase-cmf-hbase-REGIONSERVER-gy11.esgync.local.log.out:2021-08-16 > 17:56:28,956 INFO org.apache.hadoop.hbase.wal.WALSplitter: Rename > hdfs://nameservice1/hbase/data/default/TRAFODION.JAVABENCH2.OE_STOCK_INDEX_300/8a42de414d97b457da88bc4682dd7c52/recovered.edits/0000000000001810650.temp > to > hdfs://nameservice1/hbase/data/default/TRAFODION.JAVABENCH2.OE_STOCK_INDEX_300/8a42de414d97b457da88bc4682dd7c52/recovered.edits/0*000000000001825010* > {panel} > ** > you can see attachment{color:#999999} {color} > *{color:#3366ff}r{color}*{color:#3366ff}*ename_edit.log*{color} > *2. at replay phase , Reading the edits is 30* > > {panel:title=log} > hbase-cmf-hbase-REGIONSERVER-gy11.esgync.local.log.out:2021-08-16 > 17:56:14,938 INFO org.apache.hadoop.hbase.regionserver.HRegion: after > replayRecoveredEdits Maximum sequenceid 1914955 and minimum sequenceid for > the region is 1916711, replay the file, > path=hdfs://nameservice1/hbase/data/default/TRAFODION.JAVABENCH2.OE_STOCK_INDEX_300/8a42de414d97b457da88bc4682dd7c52/recovered.edits/0000000000001914955,seqid=1916711,*size=30* > {panel} > ** > > > {code:java} > org.apache.hadoop.hbase.regionserver.HRegion > NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(fs, > regiondir); > if (LOG.isDebugEnabled()) { > LOG.debug("Found " + (files == null ? 0 : files.size()) > + " recovered edits file(s) under " + regiondir); > } > if (files == null || files.isEmpty()) return seqid; > long start=System.currentTimeMillis(); > for (Path edits: files) { > if (edits == null || !fs.exists(edits)) { > LOG.warn("Null or non-existent edits file: " + edits); > continue; > } > if (isZeroLengthThenDelete(fs, edits)) continue; > long maxSeqId; > String fileName = edits.getName(); > maxSeqId = Math.abs(Long.parseLong(fileName)); > if (maxSeqId <= minSeqIdForTheRegion) { > if (LOG.isDebugEnabled()) { > String msg = "Maximum sequenceid for this wal is " + maxSeqId > + " and minimum sequenceid for the region is " + minSeqIdForTheRegion > + ", skipped the whole file, path=" + edits; > LOG.info(msg); > } > continue; > } > try { > seqid = Math.max(seqid, replayRecoveredEdits(edits, maxSeqIdInStores, > reporter)); > // replay the edits. Replay can return -1 if everything is skipped, only > update > // if seqId is greater > String msg = "after replayRecoveredEdits Maximum sequenceid " + maxSeqId > + " and minimum sequenceid for the region is " + minSeqIdForTheRegion > + ", replay the file, path=" + edits > +",seqid="+seqid+",size="+files.size(); > LOG.info(msg);{code} > -- This message was sent by Atlassian Jira (v8.3.4#803005)