Author: ramkrishna Date: Tue Oct 18 17:52:06 2011 New Revision: 1185771 URL: http://svn.apache.org/viewvc?rev=1185771&view=rev Log: HBASE-4585 Avoid seek operation when current kv is deleted(Liyin Tang)
Modified: hbase/trunk/CHANGES.txt hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/DeleteTracker.java hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/ScanDeleteTracker.java hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksRead.java hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanDeleteTracker.java Modified: hbase/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1185771&r1=1185770&r2=1185771&view=diff ============================================================================== --- hbase/trunk/CHANGES.txt (original) +++ hbase/trunk/CHANGES.txt Tue Oct 18 17:52:06 2011 @@ -625,6 +625,7 @@ Release 0.92.0 - Unreleased HBASE-4568 Make zk dump jsp response faster HBASE-4606 Remove spam in HCM and fix a list.size == 0 HBASE-3581 hbase rpc should send size of response + HBASE-4585 Avoid seek operation when current kv is deleted(Liyin Tang) TASKS Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/DeleteTracker.java URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/DeleteTracker.java?rev=1185771&r1=1185770&r2=1185771&view=diff ============================================================================== --- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/DeleteTracker.java (original) +++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/DeleteTracker.java Tue Oct 18 17:52:06 2011 @@ -51,9 +51,9 @@ public interface DeleteTracker { * @param qualifierOffset column qualifier offset * @param qualifierLength column qualifier length * @param timestamp timestamp - * @return true is the specified KeyValue is deleted, false if not + * @return deleteResult The result tells whether the KeyValue is deleted and why */ - public boolean isDeleted(byte [] buffer, int qualifierOffset, + public DeleteResult isDeleted(byte [] buffer, int qualifierOffset, int qualifierLength, long timestamp); /** @@ -94,4 +94,17 @@ public interface DeleteTracker { NEXT_NEW } + /** + * Returns codes for delete result. + * The codes tell the ScanQueryMatcher whether the kv is deleted and why. + * Based on the delete result, the ScanQueryMatcher will decide the next + * operation + */ + public static enum DeleteResult { + FAMILY_DELETED, // The KeyValue is deleted by a delete family. + COLUMN_DELETED, // The KeyValue is deleted by a delete column. + VERSION_DELETED, // The KeyValue is deleted by a version delete. + NOT_DELETED + } + } Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/ScanDeleteTracker.java URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/ScanDeleteTracker.java?rev=1185771&r1=1185770&r2=1185771&view=diff ============================================================================== --- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/ScanDeleteTracker.java (original) +++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/ScanDeleteTracker.java Tue Oct 18 17:52:06 2011 @@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.regionserver; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.regionserver.DeleteTracker.DeleteResult; import org.apache.hadoop.hbase.util.Bytes; /** @@ -99,13 +100,13 @@ public class ScanDeleteTracker implement * @param qualifierOffset column qualifier offset * @param qualifierLength column qualifier length * @param timestamp timestamp - * @return true is the specified KeyValue is deleted, false if not + * @return deleteResult */ @Override - public boolean isDeleted(byte [] buffer, int qualifierOffset, + public DeleteResult isDeleted(byte [] buffer, int qualifierOffset, int qualifierLength, long timestamp) { if (timestamp <= familyStamp) { - return true; + return DeleteResult.FAMILY_DELETED; } if (deleteBuffer != null) { @@ -114,12 +115,12 @@ public class ScanDeleteTracker implement if (ret == 0) { if (deleteType == KeyValue.Type.DeleteColumn.getCode()) { - return true; + return DeleteResult.COLUMN_DELETED; } // Delete (aka DeleteVersion) // If the timestamp is the same, keep this one if (timestamp == deleteTimestamp) { - return true; + return DeleteResult.VERSION_DELETED; } // use assert or not? assert timestamp < deleteTimestamp; @@ -138,7 +139,7 @@ public class ScanDeleteTracker implement } } - return false; + return DeleteResult.NOT_DELETED; } @Override @@ -158,4 +159,4 @@ public class ScanDeleteTracker implement public void update() { this.reset(); } -} \ No newline at end of file +} Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java?rev=1185771&r1=1185770&r2=1185771&view=diff ============================================================================== --- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (original) +++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java Tue Oct 18 17:52:06 2011 @@ -26,6 +26,7 @@ import org.apache.hadoop.hbase.client.Sc import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.Filter.ReturnCode; import org.apache.hadoop.hbase.io.TimeRange; +import org.apache.hadoop.hbase.regionserver.DeleteTracker.DeleteResult; import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException; @@ -184,15 +185,20 @@ public class ScanQueryMatcher { } } - if (!this.deletes.isEmpty() && - deletes.isDeleted(bytes, offset, qualLength, timestamp)) { - - // May be able to optimize the SKIP here, if we matched - // due to a DelFam, we can skip to next row - // due to a DelCol, we can skip to next col - // But it requires more info out of isDelete(). - // needful -> million column challenge. - return MatchCode.SKIP; + if (!this.deletes.isEmpty()) { + DeleteResult deleteResult = deletes.isDeleted(bytes, offset, qualLength, + timestamp); + switch (deleteResult) { + case FAMILY_DELETED: + case COLUMN_DELETED: + return columns.getNextRowOrNextColumn(bytes, offset, qualLength); + case VERSION_DELETED: + return MatchCode.SKIP; + case NOT_DELETED: + break; + default: + throw new RuntimeException("UNEXPECTED"); + } } int timestampComparison = tr.compare(timestamp); Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksRead.java URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksRead.java?rev=1185771&r1=1185770&r2=1185771&view=diff ============================================================================== --- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksRead.java (original) +++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksRead.java Tue Oct 18 17:52:06 2011 @@ -283,14 +283,14 @@ public class TestBlocksRead extends HBas deleteFamily(FAMILY, "row", 6); region.flushcache(); - // Baseline expected blocks read: 6. - kvs = getData(FAMILY, "row", "col1", 6); + // Baseline expected blocks read: 4. [HBASE-4585] + kvs = getData(FAMILY, "row", "col1", 4); assertEquals(0, kvs.length); - kvs = getData(FAMILY, "row", "col2", 6); + kvs = getData(FAMILY, "row", "col2", 5); assertEquals(0, kvs.length); - kvs = getData(FAMILY, "row", "col3", 6); + kvs = getData(FAMILY, "row", "col3", 4); assertEquals(0, kvs.length); - kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 6); + kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 5); assertEquals(0, kvs.length); // File 5: Delete @@ -304,8 +304,8 @@ public class TestBlocksRead extends HBas putData(FAMILY, "row", "col3", 9); region.flushcache(); - // Baseline expected blocks read: 10 - kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 10); + // Baseline expected blocks read: 8. [HBASE-4585] + kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 8); assertEquals(0, kvs.length); // File 7: Put back new data Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanDeleteTracker.java URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanDeleteTracker.java?rev=1185771&r1=1185770&r2=1185771&view=diff ============================================================================== --- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanDeleteTracker.java (original) +++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanDeleteTracker.java Tue Oct 18 17:52:06 2011 @@ -23,6 +23,7 @@ package org.apache.hadoop.hbase.regionse import org.apache.hadoop.hbase.HBaseTestCase; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.regionserver.DeleteTracker.DeleteResult; import org.apache.hadoop.hbase.util.Bytes; @@ -42,8 +43,8 @@ public class TestScanDeleteTracker exten deleteType = KeyValue.Type.Delete.getCode(); sdt.add(qualifier, 0, qualifier.length, timestamp, deleteType); - boolean ret = sdt.isDeleted(qualifier, 0, qualifier.length, timestamp); - assertEquals(true, ret); + DeleteResult ret = sdt.isDeleted(qualifier, 0, qualifier.length, timestamp); + assertEquals(DeleteResult.VERSION_DELETED, ret); } public void testDeletedBy_DeleteColumn() { @@ -52,8 +53,8 @@ public class TestScanDeleteTracker exten sdt.add(qualifier, 0, qualifier.length, timestamp, deleteType); timestamp -= 5; - boolean ret = sdt.isDeleted(qualifier, 0, qualifier.length, timestamp); - assertEquals(true, ret); + DeleteResult ret = sdt.isDeleted(qualifier, 0, qualifier.length, timestamp); + assertEquals(DeleteResult.COLUMN_DELETED, ret); } public void testDeletedBy_DeleteFamily() { @@ -63,8 +64,8 @@ public class TestScanDeleteTracker exten sdt.add(qualifier, 0, qualifier.length, timestamp, deleteType); timestamp -= 5; - boolean ret = sdt.isDeleted(qualifier, 0, qualifier.length, timestamp); - assertEquals(true, ret); + DeleteResult ret = sdt.isDeleted(qualifier, 0, qualifier.length, timestamp); + assertEquals(DeleteResult.FAMILY_DELETED, ret); } public void testDelete_DeleteColumn() { @@ -78,8 +79,8 @@ public class TestScanDeleteTracker exten sdt.add(qualifier, 0, qualifier.length, timestamp, deleteType); timestamp -= 5; - boolean ret = sdt.isDeleted(qualifier, 0, qualifier.length, timestamp); - assertEquals(true, ret); + DeleteResult ret = sdt.isDeleted(qualifier, 0, qualifier.length, timestamp); + assertEquals(DeleteResult.COLUMN_DELETED, ret); } @@ -93,8 +94,8 @@ public class TestScanDeleteTracker exten deleteType = KeyValue.Type.Delete.getCode(); sdt.add(qualifier, 0, qualifier.length, timestamp, deleteType); - boolean ret = sdt.isDeleted(qualifier, 0, qualifier.length, timestamp); - assertEquals(true, ret); + DeleteResult ret = sdt.isDeleted(qualifier, 0, qualifier.length, timestamp); + assertEquals( DeleteResult.VERSION_DELETED, ret); } //Testing new way where we save the Delete in case of a Delete for specific @@ -109,5 +110,4 @@ public class TestScanDeleteTracker exten assertEquals(false ,sdt.isEmpty()); } - }