hbase git commit: HBASE-15236 Inconsistent cell reads over multiple bulk-loaded HFiles. In KeyValueHeap, if two cells are same i.e. have same key and timestamp, then instead of directly using seq id t
Repository: hbase Updated Branches: refs/heads/branch-1 cadc4cf15 -> a6f8214e9 HBASE-15236 Inconsistent cell reads over multiple bulk-loaded HFiles. In KeyValueHeap, if two cells are same i.e. have same key and timestamp, then instead of directly using seq id to determine newer one, we should use StoreFile.Comparater.SEQ_ID because that's what is used to determine order of hfiles. In this patch, we assign each scanner an order based on it's index in storefiles list, which is then used in KeyValueHeap to disambiguate between same cells. Changes the getSequenceId() in KeyValueScanner class to getScannerOrder(). Testing: Adds unit test to TestKeyValueHeap. Manual testing: Three cases (Tables t, t2, t3 in the jira description), single region, 2 hfiles with same seq id, timestamps and duplicate KVs. Made sure that returned kv was same for get and scan. (Apekshit) Change-Id: I22600c91c0a51fb63eb17db73472839d2f13957c Signed-off-by: stackProject: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/a6f8214e Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/a6f8214e Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/a6f8214e Branch: refs/heads/branch-1 Commit: a6f8214e9b8cce79edcaa12bc026d9ef7f77970f Parents: cadc4cf Author: Apekshit Authored: Tue Feb 23 00:31:18 2016 -0800 Committer: stack Committed: Thu May 12 21:31:13 2016 -0700 -- .../hbase/regionserver/DefaultMemStore.java | 7 +- .../hadoop/hbase/regionserver/KeyValueHeap.java | 20 +- .../hbase/regionserver/KeyValueScanner.java | 12 +- .../hadoop/hbase/regionserver/StoreFile.java| 52 ++-- .../hbase/regionserver/StoreFileScanner.java| 38 ++- .../hadoop/hbase/regionserver/StoreScanner.java | 5 +- .../hbase/util/CollectionBackedScanner.java | 5 +- .../hbase/regionserver/TestKeyValueHeap.java| 268 +++ .../hbase/regionserver/TestStoreFile.java | 4 +- .../regionserver/compactions/TestCompactor.java | 2 +- .../compactions/TestStripeCompactionPolicy.java | 3 +- 11 files changed, 198 insertions(+), 218 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hbase/blob/a6f8214e/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java -- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java index 05041f5..ce793a9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java @@ -939,11 +939,12 @@ public class DefaultMemStore implements MemStore { } /** - * MemStoreScanner returns max value as sequence id because it will - * always have the latest data among all files. + * MemStoreScanner returns Long.MAX_VALUE because it will always have the latest data among all + * scanners. + * @see KeyValueScanner#getScannerOrder() */ @Override -public long getSequenceID() { +public long getScannerOrder() { return Long.MAX_VALUE; } http://git-wip-us.apache.org/repos/asf/hbase/blob/a6f8214e/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java -- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java index ac76bfd..4b77e179 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java @@ -182,17 +182,10 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner if (comparison != 0) { return comparison; } else { -// Since both the keys are exactly the same, we break the tie in favor -// of the key which came latest. -long leftSequenceID = left.getSequenceID(); -long rightSequenceID = right.getSequenceID(); -if (leftSequenceID > rightSequenceID) { - return -1; -} else if (leftSequenceID < rightSequenceID) { - return 1; -} else { - return 0; -} +// Since both the keys are exactly the same, we break the tie in favor of higher ordered +// scanner since it'll have newer data. Since higher value should come first, we reverse +// sort here. +return Long.compare(right.getScannerOrder(), left.getScannerOrder()); }
hbase git commit: HBASE-15236 Inconsistent cell reads over multiple bulk-loaded HFiles. In KeyValueHeap, if two cells are same i.e. have same key and timestamp, then instead of directly using seq id t
Repository: hbase Updated Branches: refs/heads/master a6e29676d -> 11740570c HBASE-15236 Inconsistent cell reads over multiple bulk-loaded HFiles. In KeyValueHeap, if two cells are same i.e. have same key and timestamp, then instead of directly using seq id to determine newer one, we should use StoreFile.Comparater.SEQ_ID because that's what is used to determine order of hfiles. In this patch, we assign each scanner an order based on it's index in storefiles list, which is then used in KeyValueHeap to disambiguate between same cells. Changes the getSequenceId() in KeyValueScanner class to getScannerOrder(). Testing: Adds unit test to TestKeyValueHeap. Manual testing: Three cases (Tables t, t2, t3 in the jira description), single region, 2 hfiles with same seq id, timestamps and duplicate KVs. Made sure that returned kv was same for get and scan. (Apekshit) Change-Id: I22600c91c0a51fb63eb17db73472839d2f13957c Signed-off-by: stackProject: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/11740570 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/11740570 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/11740570 Branch: refs/heads/master Commit: 11740570c1440254a76fae67d318c6a852cb56b8 Parents: a6e2967 Author: Apekshit Authored: Tue Feb 23 00:31:18 2016 -0800 Committer: stack Committed: Mon May 9 16:57:06 2016 -0700 -- .../hadoop/hbase/regionserver/KeyValueHeap.java | 20 +- .../hbase/regionserver/KeyValueScanner.java | 12 +- .../hbase/regionserver/MemStoreScanner.java | 7 +- .../hbase/regionserver/SegmentScanner.java | 35 +-- .../hadoop/hbase/regionserver/StoreFile.java| 6 +- .../hbase/regionserver/StoreFileReader.java | 43 +-- .../hbase/regionserver/StoreFileScanner.java| 38 ++- .../hadoop/hbase/regionserver/StoreScanner.java | 5 +- .../hbase/util/CollectionBackedScanner.java | 5 +- .../hbase/regionserver/TestKeyValueHeap.java| 269 +++ .../hbase/regionserver/TestStoreFile.java | 2 +- .../regionserver/compactions/TestCompactor.java | 2 +- .../compactions/TestStripeCompactionPolicy.java | 3 +- 13 files changed, 212 insertions(+), 235 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hbase/blob/11740570/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java -- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java index 89fc8fb..9ece14b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java @@ -189,17 +189,10 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner if (comparison != 0) { return comparison; } else { -// Since both the keys are exactly the same, we break the tie in favor -// of the key which came latest. -long leftSequenceID = left.getSequenceID(); -long rightSequenceID = right.getSequenceID(); -if (leftSequenceID > rightSequenceID) { - return -1; -} else if (leftSequenceID < rightSequenceID) { - return 1; -} else { - return 0; -} +// Since both the keys are exactly the same, we break the tie in favor of higher ordered +// scanner since it'll have newer data. Since higher value should come first, we reverse +// sort here. +return Long.compare(right.getScannerOrder(), left.getScannerOrder()); } } /** @@ -406,8 +399,11 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner return this.heap; } + /** + * @see KeyValueScanner#getScannerOrder() + */ @Override - public long getSequenceID() { + public long getScannerOrder() { return 0; } http://git-wip-us.apache.org/repos/asf/hbase/blob/11740570/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java -- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java index ed86a83..44b081b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java @@ -70,13 +70,13 @@ public interface KeyValueScanner extends Shipper, Closeable { boolean reseek(Cell key)