[ https://issues.apache.org/jira/browse/SOLR-4531?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Shalin Shekhar Mangar resolved SOLR-4531. ----------------------------------------- Resolution: Fixed Assignee: Shalin Shekhar Mangar Fix Version/s: master (7.0) 6.3 Thanks Simon and Dat! > Add tests to ensure that recovery does not fail on corrupted tlogs > ------------------------------------------------------------------ > > Key: SOLR-4531 > URL: https://issues.apache.org/jira/browse/SOLR-4531 > Project: Solr > Issue Type: Bug > Components: SolrCloud > Affects Versions: 4.0 > Reporter: Simon Scofield > Assignee: Shalin Shekhar Mangar > Fix For: 6.3, master (7.0) > > Attachments: SOLR-4531.patch, SOLR-4531.patch > > > One of the solr nodes in our SolrCloud was killed. It caused tlog was > corrupted. Now the node can't finish recoverying. There is an excepion: > Caused by: java.lang.IndexOutOfBoundsException: Index: 14, Size: 13 > at java.util.ArrayList.RangeCheck(ArrayList.java:547) > at java.util.ArrayList.get(ArrayList.java:322) > at > org.apache.solr.update.TransactionLog$LogCodec.readExternString(TransactionLog.java:128) > at > org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:188) > at > org.apache.solr.common.util.JavaBinCodec.readOrderedMap(JavaBinCodec.java:120) > at > org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:184) > at > org.apache.solr.common.util.JavaBinCodec.readArray(JavaBinCodec.java:451) > at > org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:182) > at > org.apache.solr.common.util.JavaBinCodec.readOrderedMap(JavaBinCodec.java:121) > at > org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:184) > at > org.apache.solr.common.util.JavaBinCodec.readArray(JavaBinCodec.java:451) > at > org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:182) > at > org.apache.solr.common.util.JavaBinCodec.readArray(JavaBinCodec.java:451) > at > org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:182) > at > org.apache.solr.update.TransactionLog$ReverseReader.next(TransactionLog.java:708) > at > org.apache.solr.update.UpdateLog$RecentUpdates.update(UpdateLog.java:906) > at > org.apache.solr.update.UpdateLog$RecentUpdates.access$000(UpdateLog.java:846) > at org.apache.solr.update.UpdateLog.getRecentUpdates(UpdateLog.java:996) > at org.apache.solr.update.UpdateLog.init(UpdateLog.java:241) > at org.apache.solr.update.UpdateHandler.initLog(UpdateHandler.java:94) > at org.apache.solr.update.UpdateHandler.<init>(UpdateHandler.java:123) > at > org.apache.solr.update.DirectUpdateHandler2.<init>(DirectUpdateHandler2.java:97) > ... 31 more > I check the code in UpdateLog.java. I find that only IOException is catched > when the above expception happens. > {code:title=solr\\core\\src\\java\\org\\apache\\solr\\update\\UpdateLog.java|borderStyle=solid} > private void update() { > int numUpdates = 0; > updateList = new ArrayList<List<Update>>(logList.size()); > deleteByQueryList = new ArrayList<Update>(); > deleteList = new ArrayList<DeleteUpdate>(); > updates = new HashMap<Long,Update>(numRecordsToKeep); > for (TransactionLog oldLog : logList) { > List<Update> updatesForLog = new ArrayList<Update>(); > TransactionLog.ReverseReader reader = null; > try { > reader = oldLog.getReverseReader(); > while (numUpdates < numRecordsToKeep) { > Object o = reader.next(); > if (o==null) break; > try { > // should currently be a List<Oper,Ver,Doc/Id> > List entry = (List)o; > // TODO: refactor this out so we get common error handling > int opAndFlags = (Integer)entry.get(0); > if (latestOperation == 0) { > latestOperation = opAndFlags; > } > int oper = opAndFlags & UpdateLog.OPERATION_MASK; > long version = (Long) entry.get(1); > switch (oper) { > case UpdateLog.ADD: > case UpdateLog.DELETE: > case UpdateLog.DELETE_BY_QUERY: > Update update = new Update(); > update.log = oldLog; > update.pointer = reader.position(); > update.version = version; > updatesForLog.add(update); > updates.put(version, update); > > if (oper == UpdateLog.DELETE_BY_QUERY) { > deleteByQueryList.add(update); > } else if (oper == UpdateLog.DELETE) { > deleteList.add(new DeleteUpdate(version, > (byte[])entry.get(2))); > } > > break; > case UpdateLog.COMMIT: > break; > default: > throw new > SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown Operation! " + > oper); > } > } catch (ClassCastException cl) { > log.warn("Unexpected log entry or corrupt log. Entry=" + o, > cl); > // would be caused by a corrupt transaction log > } catch (Exception ex) { > log.warn("Exception reverse reading log", ex); > break; > } > } > } catch (IOException e) { > // failure to read a log record isn't fatal > log.error("Exception reading versions from log",e); > } finally { > if (reader != null) reader.close(); > } > updateList.add(updatesForLog); > } > } > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org