[ https://issues.apache.org/jira/browse/HBASE-27552?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
chaijunjie resolved HBASE-27552. -------------------------------- Resolution: Not A Problem cannot be reproduced again...close it > HMaster can not finish Initialization when hbase:namespace is in > ABNORMALLY_CLOSED state and the proc corrupt > ------------------------------------------------------------------------------------------------------------- > > Key: HBASE-27552 > URL: https://issues.apache.org/jira/browse/HBASE-27552 > Project: HBase > Issue Type: Bug > Components: proc-v2 > Affects Versions: 2.4.14 > Reporter: chaijunjie > Priority: Major > Labels: core > > 2023-01-05 19:56:41,385 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=102, ppid=97, > state=SUCCESS; OpenRegionProcedure 1903713b7f970a75db1e7a0e72da21d7, > server=node-master2mesq,21302,1672817611868 | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,385 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=103, ppid=96, > state=SUCCESS; OpenRegionProcedure 6695b9c5ad80249bc43830ddc5259487, > server=node-master2mesq,21302,1672817611868 | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,402 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=106, ppid=82, > state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; > TransitRegionStateProcedure table=ImportTable1, > region=050bcf6e15ddd079d750992bbfb53163, ASSIGN | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,403 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=107, ppid=82, > state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; > TransitRegionStateProcedure table=hbase:hindex, > region=6789443c0a98d2b34f891ae60878aac3, ASSIGN | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,404 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=108, ppid=82, > state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; > TransitRegionStateProcedure table=hbase:acl, > region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,404 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=109, ppid=82, > state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; > TransitRegionStateProcedure table=ImportTable1, > region=24e0cb0a958d242976a790ff435d24b5, ASSIGN | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,405 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=110, ppid=82, > state=RUNNABLE:REGION_STATE_TRANSITION_OPEN; TransitRegionStateProcedure > table=ImportTable1, region=a2e7b85420a3cf98fc731ad93f7129a2, ASSIGN | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > *2023-01-05 19:56:41,405 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=111, ppid=82, > state=RUNNABLE:REGION_STATE_TRANSITION_OPEN; TransitRegionStateProcedure > table=hbase:namespace, region=9be1542260fa8af4a712ddda322b7b6f, ASSIGN | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343)* > 2023-01-05 19:56:41,406 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=112, ppid=82, > state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; > TransitRegionStateProcedure table=hbase:rsgroup, > region=eaf1531c6cc0738027def0b4d4615b5f, ASSIGN | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,406 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=125, ppid=95, > state=SUCCESS; OpenRegionProcedure 85301e5c14a8c3e5ba31822d7db0a6fc, > server=node-master3mpye,21302,1672817640502 | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,407 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=126, ppid=96, > state=SUCCESS; OpenRegionProcedure 6695b9c5ad80249bc43830ddc5259487, > server=node-master3mpye,21302,1672817640502 | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,408 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=127, ppid=94, > state=SUCCESS; OpenRegionProcedure 448b88d503d4e31c47b80ac10d8ef6a4, > server=node-master3mpye,21302,1672817640502 | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > > 2023-01-05 19:57:26,250 | INFO | PEWorker-13 | > hdfs://hacluster/hbase/WALs/node-master2mesq,21302,1672919282543-splitting > dir is empty, no logs to split. | > org.apache.hadoop.hbase.master.SplitLogManager.getFileList(SplitLogManager.java:171) > 2023-01-05 19:57:26,250 | INFO | PEWorker-13 | > node-master2mesq,21302,1672919282543 WAL count=0, meta=false | > org.apache.hadoop.hbase.master.SplitWALManager.getWALsToSplit(SplitWALManager.java:106) > *2023-01-05 19:57:27,068 | WARN | > master/node-master3MPYe:21300:becomeActiveMaster | > hbase:namespace,,1672387265579.9be1542260fa8af4a712ddda322b7b6f. is NOT > online; state=\{9be1542260fa8af4a712ddda322b7b6f state=ABNORMALLY_CLOSED, > ts=1672919843989, server=node-master1ficj,21302,1672820444411}; > ServerCrashProcedures=true. Master startup cannot progress, in > holding-pattern until region onlined. | > org.apache.hadoop.hbase.master.HMaster.isRegionOnline(HMaster.java:1264)* > 2023-01-05 19:57:27,227 | INFO | PEWorker-13 | Initialized subprocedures=[ > {pid=606, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=ImportTable1, > region=8d29da5ef730c7a003cafb0be8981674, ASSIGN} > , > {pid=607, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=hbase:hindex, > region=6789443c0a98d2b34f891ae60878aac3, ASSIGN} > , > {pid=608, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=ImportTable1, > region=3117c3b77e4cf7a9f7ae9fa1aec87f08, ASSIGN} > , > {pid=609, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=hbase:acl, > region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN} > , > {pid=610, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=ImportTable1, > region=9051d9e17c5b811c09471616044ed8be, ASSIGN} > ] | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execProcedure(ProcedureExecutor.java:1683) > 2023-01-05 19:57:27,235 | INFO | PEWorker-14 | Took xlock for pid=606, > ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=ImportTable1, > region=8d29da5ef730c7a003cafb0be8981674, ASSIGN | > org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) > 2023-01-05 19:57:27,236 | INFO | PEWorker-4 | Took xlock for pid=607, > ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=hbase:hindex, > region=6789443c0a98d2b34f891ae60878aac3, ASSIGN | > org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) > 2023-01-05 19:57:27,242 | INFO | PEWorker-16 | Took xlock for pid=609, > ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=hbase:acl, > region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN | > org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) > 2023-01-05 19:57:27,245 | INFO | PEWorker-13 | Took xlock for pid=610, > ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=ImportTable1, > region=9051d9e17c5b811c09471616044ed8be, ASSIGN | > org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) > 2023-01-05 19:57:27,249 | INFO | PEWorker-15 | Took xlock for pid=608, > ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=ImportTable1, > region=3117c3b77e4cf7a9f7ae9fa1aec87f08, ASSIGN | > org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) > 2023-01-05 19:57:27,250 | INFO | PEWorker-14 | Setting lastHost as the > region location node-master2mesq,21302,1672919282543 | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) > 2023-01-05 19:57:27,252 | INFO | PEWorker-14 | Starting pid=606, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; > TransitRegionStateProcedure table=ImportTable1, > region=8d29da5ef730c7a003cafb0be8981674, ASSIGN; state=OPEN, > location=node-master2mesq,21302,1672919282543; forceNewPlan=false, > retain=true | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) > 2023-01-05 19:57:27,254 | INFO | PEWorker-4 | Setting lastHost as the region > location node-master2mesq,21302,1672919282543 | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) > 2023-01-05 19:57:27,257 | INFO | PEWorker-4 | Starting pid=607, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; > TransitRegionStateProcedure table=hbase:hindex, > region=6789443c0a98d2b34f891ae60878aac3, ASSIGN; state=OPEN, > location=node-master2mesq,21302,1672919282543; forceNewPlan=false, > retain=true | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) > 2023-01-05 19:57:27,266 | INFO | PEWorker-16 | Setting lastHost as the > region location node-master2mesq,21302,1672919282543 | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) > 2023-01-05 19:57:27,266 | INFO | PEWorker-13 | Setting lastHost as the > region location node-master2mesq,21302,1672919282543 | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) > 2023-01-05 19:57:27,267 | INFO | PEWorker-16 | Starting pid=609, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; > TransitRegionStateProcedure table=hbase:acl, > region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN; state=OPEN, > location=node-master2mesq,21302,1672919282543; forceNewPlan=false, > retain=true | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) > 2023-01-05 19:57:27,267 | INFO | PEWorker-13 | Starting pid=610, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; > TransitRegionStateProcedure table=ImportTable1, > region=9051d9e17c5b811c09471616044ed8be, ASSIGN; state=OPEN, > location=node-master2mesq,21302,1672919282543; forceNewPlan=false, > retain=true | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) > 2023-01-05 19:57:27,271 | INFO | PEWorker-15 | Setting lastHost as the > region location node-master2mesq,21302,1672919282543 | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) > 2023-01-05 19:57:27,271 | INFO | PEWorker-15 | Starting pid=608, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; > TransitRegionStateProcedure table=ImportTable1, > region=3117c3b77e4cf7a9f7ae9fa1aec87f08, ASSIGN; state=OPEN, > location=node-master2mesq,21302,1672919282543; forceNewPlan=false, > retain=true | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) > 2023-01-05 19:57:27,280 | INFO | PEWorker-19 | pid=606 updating hbase:meta > row=8d29da5ef730c7a003cafb0be8981674, regionState=OPENING, > regionLocation=node-master1ficj,21302,1672919746624 | > org.apache.hadoop.hbase.master.assignment.RegionStateStore.updateUserRegionLocation(RegionStateStore.java:219) > > *The hbase:namespace is in ABNORMALLY_CLOSED state and the the proc is > corrupt, then the hbase:namespace never assigned and HMaster Initialization > failed* > > *I think we should process the regions in ABNORMALLY_CLOSED state in > org.apache.hadoop.hbase.master.assignment.AssignmentManager#processOfflineRegions? > or check proc in > org.apache.hadoop.hbase.master.HMaster#waitForNamespaceOnline?* -- This message was sent by Atlassian Jira (v8.20.10#820010)