[ https://issues.apache.org/jira/browse/CASSANDRA-3450?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13143547#comment-13143547 ]
Lanny Ripple commented on CASSANDRA-3450: ----------------------------------------- Don't have the time to work this up formally but here's a suggested fix. {noformat} diff --git a/src/java/org/apache/cassandra/hadoop/ColumnFamilyRecordReader.java b/src/java/org/apache/cassandra/hadoop/ColumnFamilyRecordReader.java index 7b4b0d6..13e1fee 100644 --- a/src/java/org/apache/cassandra/hadoop/ColumnFamilyRecordReader.java +++ b/src/java/org/apache/cassandra/hadoop/ColumnFamilyRecordReader.java @@ -234,51 +234,57 @@ public class ColumnFamilyRecordReader extends RecordReader<ByteBuffer, SortedMap if (startToken == null) { startToken = split.getStartToken(); - } - else if (startToken.equals(split.getEndToken())) - { - rows = null; - return; } - - KeyRange keyRange = new KeyRange(batchRowCount) - .setStart_token(startToken) - .setEnd_token(split.getEndToken()); - try + + // The removal of empty CF rows could result in an empty List<KeySlice> rows. + // Keep trying until we return on reaching the end of the range or rows is nonEmpty. + while (rows == null || rows.isEmpty) { - rows = client.get_range_slices(new ColumnParent(cfName), - predicate, - keyRange, - consistencyLevel); - - // nothing new? reached the end - if (rows.isEmpty()) + if (startToken.equals(split.getEndToken())) { rows = null; return; } - // Pre-compute the last row key, before removing empty rows - ByteBuffer lastRowKey = rows.get(rows.size() - 1).key; + KeyRange keyRange = new KeyRange(batchRowCount) + .setStart_token(startToken) + .setEnd_token(split.getEndToken()); + try + { + rows = client.get_range_slices(new ColumnParent(cfName), + predicate, + keyRange, + consistencyLevel); + + // nothing new? reached the end + if (rows.isEmpty()) + { + rows = null; + return; + } + + // Pre-compute the last row key, before removing empty rows + ByteBuffer lastRowKey = rows.get(rows.size() - 1).key; + + // only remove empty rows if the slice predicate is empty + if (isPredicateEmpty(predicate)) + { + Iterator<KeySlice> rowsIterator = rows.iterator(); + while (rowsIterator.hasNext()) + if (rowsIterator.next().columns.isEmpty()) + rowsIterator.remove(); + } - // only remove empty rows if the slice predicate is empty - if (isPredicateEmpty(predicate)) + // reset to iterate through the new batch + i = 0; + + // prepare for the next slice to be read + startToken = partitioner.getTokenFactory().toString(partitioner.getToken(lastRowKey)); + } + catch (Exception e) { - Iterator<KeySlice> rowsIterator = rows.iterator(); - while (rowsIterator.hasNext()) - if (rowsIterator.next().columns.isEmpty()) - rowsIterator.remove(); + throw new RuntimeException(e); } - - // reset to iterate through the new batch - i = 0; - - // prepare for the next slice to be read - startToken = partitioner.getTokenFactory().toString(partitioner.getToken(lastRowKey)); - } - catch (Exception e) - { - throw new RuntimeException(e); } } {noformat} > maybeInit in ColumnFamilyRecordReader can cause rows to be empty but not null > ----------------------------------------------------------------------------- > > Key: CASSANDRA-3450 > URL: https://issues.apache.org/jira/browse/CASSANDRA-3450 > Project: Cassandra > Issue Type: Bug > Components: Hadoop > Affects Versions: 1.0.1 > Reporter: Lanny Ripple > Priority: Minor > > In {{ColumnFamilyRecordReader}} {{computeNext()}} calls {{maybeInit()}} and > then if {{rows}} is not null it is indexed into. {{maybeInit()}} could fetch > new data, determine the associated slice predicate is empty, and end up > removing all the rows if all columns turned out to be empty. There is no > check for {{rows.isEmpty()}} after the possible removal of all rows. -- This message is automatically generated by JIRA. If you think it was sent incorrectly, please contact your JIRA administrators: https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa For more information on JIRA, see: http://www.atlassian.com/software/jira