hbase git commit: HBASE-15236 Inconsistent cell reads over multiple bulk-loaded HFiles. In KeyValueHeap, if two cells are same i.e. have same key and timestamp, then instead of directly using seq id t

2016-05-12 Thread stack
Repository: hbase
Updated Branches:
  refs/heads/branch-1 cadc4cf15 -> a6f8214e9


HBASE-15236 Inconsistent cell reads over multiple bulk-loaded HFiles. In 
KeyValueHeap, if two cells are same i.e. have same key and timestamp, then 
instead of directly using seq id to determine newer one, we should use 
StoreFile.Comparater.SEQ_ID because that's what is used to determine order of 
hfiles. In this patch, we assign each scanner an order based on it's index in 
storefiles list, which is then used in KeyValueHeap to disambiguate between 
same cells. Changes the getSequenceId() in KeyValueScanner class to 
getScannerOrder(). Testing: Adds unit test to TestKeyValueHeap. Manual testing: 
Three cases (Tables t, t2, t3 in the jira description), single region, 2 hfiles 
with same seq id, timestamps and duplicate KVs. Made sure that returned kv was 
same for get and scan. (Apekshit)

Change-Id: I22600c91c0a51fb63eb17db73472839d2f13957c

Signed-off-by: stack 


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/a6f8214e
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/a6f8214e
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/a6f8214e

Branch: refs/heads/branch-1
Commit: a6f8214e9b8cce79edcaa12bc026d9ef7f77970f
Parents: cadc4cf
Author: Apekshit 
Authored: Tue Feb 23 00:31:18 2016 -0800
Committer: stack 
Committed: Thu May 12 21:31:13 2016 -0700

--
 .../hbase/regionserver/DefaultMemStore.java |   7 +-
 .../hadoop/hbase/regionserver/KeyValueHeap.java |  20 +-
 .../hbase/regionserver/KeyValueScanner.java |  12 +-
 .../hadoop/hbase/regionserver/StoreFile.java|  52 ++--
 .../hbase/regionserver/StoreFileScanner.java|  38 ++-
 .../hadoop/hbase/regionserver/StoreScanner.java |   5 +-
 .../hbase/util/CollectionBackedScanner.java |   5 +-
 .../hbase/regionserver/TestKeyValueHeap.java| 268 +++
 .../hbase/regionserver/TestStoreFile.java   |   4 +-
 .../regionserver/compactions/TestCompactor.java |   2 +-
 .../compactions/TestStripeCompactionPolicy.java |   3 +-
 11 files changed, 198 insertions(+), 218 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hbase/blob/a6f8214e/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java
--
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java
index 05041f5..ce793a9 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultMemStore.java
@@ -939,11 +939,12 @@ public class DefaultMemStore implements MemStore {
 }
 
 /**
- * MemStoreScanner returns max value as sequence id because it will
- * always have the latest data among all files.
+ * MemStoreScanner returns Long.MAX_VALUE because it will always have the 
latest data among all
+ * scanners.
+ * @see KeyValueScanner#getScannerOrder()
  */
 @Override
-public long getSequenceID() {
+public long getScannerOrder() {
   return Long.MAX_VALUE;
 }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/a6f8214e/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java
--
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java
index ac76bfd..4b77e179 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java
@@ -182,17 +182,10 @@ public class KeyValueHeap extends 
NonReversedNonLazyKeyValueScanner
   if (comparison != 0) {
 return comparison;
   } else {
-// Since both the keys are exactly the same, we break the tie in favor
-// of the key which came latest.
-long leftSequenceID = left.getSequenceID();
-long rightSequenceID = right.getSequenceID();
-if (leftSequenceID > rightSequenceID) {
-  return -1;
-} else if (leftSequenceID < rightSequenceID) {
-  return 1;
-} else {
-  return 0;
-}
+// Since both the keys are exactly the same, we break the tie in favor 
of higher ordered
+// scanner since it'll have newer data. Since higher value should come 
first, we reverse
+// sort here.
+return Long.compare(right.getScannerOrder(), left.getScannerOrder());
   }
 }
 /**
@@ -392,8 +385,11 @@ public class KeyValueHeap 

hbase git commit: HBASE-15808 Reduce potential bulk load intermediate space usage and waste

2016-05-12 Thread jerryjch
Repository: hbase
Updated Branches:
  refs/heads/master 1267f76e9 -> acca95fb5


HBASE-15808 Reduce potential bulk load intermediate space usage and waste


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/acca95fb
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/acca95fb
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/acca95fb

Branch: refs/heads/master
Commit: acca95fb5046a52ad26ca84fae1adeab932472a6
Parents: 1267f76
Author: Jerry He 
Authored: Thu May 12 15:43:48 2016 -0700
Committer: Jerry He 
Committed: Thu May 12 15:43:48 2016 -0700

--
 .../hbase/mapreduce/LoadIncrementalHFiles.java  | 26 --
 .../TestLoadIncrementalHFilesSplitRecovery.java | 38 
 2 files changed, 61 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hbase/blob/acca95fb/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
--
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
index 86a84a4..0084878 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
@@ -118,6 +118,10 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
   private static final String ASSIGN_SEQ_IDS = 
"hbase.mapreduce.bulkload.assign.sequenceNumbers";
   public final static String CREATE_TABLE_CONF_KEY = "create.table";
 
+  // We use a '.' prefix which is ignored when walking directory trees
+  // above. It is invalid family name.
+  final static String TMP_DIR = ".tmp";
+
   private int maxFilesPerRegionPerFamily;
   private boolean assignSeqIds;
 
@@ -201,6 +205,14 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
   }
   Path familyDir = familyStat.getPath();
   byte[] familyName = familyDir.getName().getBytes();
+  // Skip invalid family
+  try {
+HColumnDescriptor.isLegalFamilyName(familyName);
+  }
+  catch (IllegalArgumentException e) {
+LOG.warn("Skipping invalid " + familyStat.getPath());
+continue;
+  }
   TFamily family = visitor.bulkFamily(familyName);
 
   FileStatus[] hfileStatuses = fs.listStatus(familyDir);
@@ -632,9 +644,6 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
   byte[] splitKey) throws IOException {
 final Path hfilePath = item.hfilePath;
 
-// We use a '_' prefix which is ignored when walking directory trees
-// above.
-final String TMP_DIR = "_tmp";
 Path tmpDir = item.hfilePath.getParent();
 if (!tmpDir.getName().equals(TMP_DIR)) {
   tmpDir = new Path(tmpDir, TMP_DIR);
@@ -661,6 +670,17 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
 lqis.add(new LoadQueueItem(item.family, botOut));
 lqis.add(new LoadQueueItem(item.family, topOut));
 
+// If the current item is already the result of previous splits,
+// we don't need it anymore. Clean up to save space.
+// It is not part of the original input files.
+try {
+  tmpDir = item.hfilePath.getParent();
+  if (tmpDir.getName().equals(TMP_DIR)) {
+fs.delete(item.hfilePath, false);
+  }
+} catch (IOException e) {
+  LOG.warn("Unable to delete temporary split file " + item.hfilePath);
+}
 LOG.info("Successfully split into new HFiles " + botOut + " and " + 
topOut);
 return lqis;
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/acca95fb/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
--
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
index 32e3058..0975fd2 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hbase.mapreduce;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -63,6 +64,7 @@ import 
org.apache.hadoop.hbase.regionserver.TestHRegionServerBulkLoad;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org

hbase git commit: HBASE-15808 Reduce potential bulk load intermediate space usage and waste

2016-05-12 Thread jerryjch
Repository: hbase
Updated Branches:
  refs/heads/branch-1.3 921f745b2 -> 0b59341d2


HBASE-15808 Reduce potential bulk load intermediate space usage and waste


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/0b59341d
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/0b59341d
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/0b59341d

Branch: refs/heads/branch-1.3
Commit: 0b59341d2442f9d3ac591676f3277eb3a887005b
Parents: 921f745
Author: Jerry He 
Authored: Thu May 12 15:22:56 2016 -0700
Committer: Jerry He 
Committed: Thu May 12 15:29:56 2016 -0700

--
 .../hbase/mapreduce/LoadIncrementalHFiles.java  | 26 --
 .../TestLoadIncrementalHFilesSplitRecovery.java | 36 
 2 files changed, 59 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hbase/blob/0b59341d/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
--
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
index 07059bc..0893b2e 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
@@ -121,6 +121,10 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
   private static final String ASSIGN_SEQ_IDS = 
"hbase.mapreduce.bulkload.assign.sequenceNumbers";
   public final static String CREATE_TABLE_CONF_KEY = "create.table";
 
+  // We use a '.' prefix which is ignored when walking directory trees
+  // above. It is invalid family name.
+  final static String TMP_DIR = ".tmp";
+
   private int maxFilesPerRegionPerFamily;
   private boolean assignSeqIds;
 
@@ -203,6 +207,14 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
   }
   Path familyDir = familyStat.getPath();
   byte[] familyName = familyDir.getName().getBytes();
+  // Skip invalid family
+  try {
+HColumnDescriptor.isLegalFamilyName(familyName);
+  }
+  catch (IllegalArgumentException e) {
+LOG.warn("Skipping invalid " + familyStat.getPath());
+continue;
+  }
   TFamily family = visitor.bulkFamily(familyName);
 
   FileStatus[] hfileStatuses = fs.listStatus(familyDir);
@@ -660,9 +672,6 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
   byte[] splitKey) throws IOException {
 final Path hfilePath = item.hfilePath;
 
-// We use a '_' prefix which is ignored when walking directory trees
-// above.
-final String TMP_DIR = "_tmp";
 Path tmpDir = item.hfilePath.getParent();
 if (!tmpDir.getName().equals(TMP_DIR)) {
   tmpDir = new Path(tmpDir, TMP_DIR);
@@ -689,6 +698,17 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
 lqis.add(new LoadQueueItem(item.family, botOut));
 lqis.add(new LoadQueueItem(item.family, topOut));
 
+// If the current item is already the result of previous splits,
+// we don't need it anymore. Clean up to save space.
+// It is not part of the original input files.
+try {
+  tmpDir = item.hfilePath.getParent();
+  if (tmpDir.getName().equals(TMP_DIR)) {
+fs.delete(item.hfilePath, false);
+  }
+} catch (IOException e) {
+  LOG.warn("Unable to delete temporary split file " + item.hfilePath);
+}
 LOG.info("Successfully split into new HFiles " + botOut + " and " + 
topOut);
 return lqis;
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/0b59341d/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
--
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
index e3024d0..26583f3 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hbase.mapreduce;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -61,6 +62,7 @@ import 
org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileRequ
 import org.apache.hadoop.hbase.regionserver.HRegionServ

hbase git commit: HBASE-15808 Reduce potential bulk load intermediate space usage and waste

2016-05-12 Thread jerryjch
Repository: hbase
Updated Branches:
  refs/heads/branch-1.2 258d96021 -> 6e3770b71


HBASE-15808 Reduce potential bulk load intermediate space usage and waste


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/6e3770b7
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/6e3770b7
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/6e3770b7

Branch: refs/heads/branch-1.2
Commit: 6e3770b7167ca1b54380603c2e1cf3f232005680
Parents: 258d960
Author: Jerry He 
Authored: Thu May 12 15:22:56 2016 -0700
Committer: Jerry He 
Committed: Thu May 12 15:23:57 2016 -0700

--
 .../hbase/mapreduce/LoadIncrementalHFiles.java  | 26 --
 .../TestLoadIncrementalHFilesSplitRecovery.java | 36 
 2 files changed, 59 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hbase/blob/6e3770b7/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
--
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
index 9630a35..15444ff 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
@@ -123,6 +123,10 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
   private static final String ASSIGN_SEQ_IDS = 
"hbase.mapreduce.bulkload.assign.sequenceNumbers";
   public final static String CREATE_TABLE_CONF_KEY = "create.table";
 
+  // We use a '.' prefix which is ignored when walking directory trees
+  // above. It is invalid family name.
+  final static String TMP_DIR = ".tmp";
+
   private int maxFilesPerRegionPerFamily;
   private boolean assignSeqIds;
 
@@ -202,6 +206,14 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
   }
   Path familyDir = familyStat.getPath();
   byte[] familyName = familyDir.getName().getBytes();
+  // Skip invalid family
+  try {
+HColumnDescriptor.isLegalFamilyName(familyName);
+  }
+  catch (IllegalArgumentException e) {
+LOG.warn("Skipping invalid " + familyStat.getPath());
+continue;
+  }
   TFamily family = visitor.bulkFamily(familyName);
 
   FileStatus[] hfileStatuses = fs.listStatus(familyDir);
@@ -611,9 +623,6 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
   byte[] splitKey) throws IOException {
 final Path hfilePath = item.hfilePath;
 
-// We use a '_' prefix which is ignored when walking directory trees
-// above.
-final String TMP_DIR = "_tmp";
 Path tmpDir = item.hfilePath.getParent();
 if (!tmpDir.getName().equals(TMP_DIR)) {
   tmpDir = new Path(tmpDir, TMP_DIR);
@@ -640,6 +649,17 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
 lqis.add(new LoadQueueItem(item.family, botOut));
 lqis.add(new LoadQueueItem(item.family, topOut));
 
+// If the current item is already the result of previous splits,
+// we don't need it anymore. Clean up to save space.
+// It is not part of the original input files.
+try {
+  tmpDir = item.hfilePath.getParent();
+  if (tmpDir.getName().equals(TMP_DIR)) {
+fs.delete(item.hfilePath, false);
+  }
+} catch (IOException e) {
+  LOG.warn("Unable to delete temporary split file " + item.hfilePath);
+}
 LOG.info("Successfully split into new HFiles " + botOut + " and " + 
topOut);
 return lqis;
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/6e3770b7/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
--
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
index e3024d0..26583f3 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hbase.mapreduce;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -61,6 +62,7 @@ import 
org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileRequ
 import org.apache.hadoop.hbase.regionserver.HRegionServ

hbase git commit: HBASE-15808 Reduce potential bulk load intermediate space usage and waste

2016-05-12 Thread jerryjch
Repository: hbase
Updated Branches:
  refs/heads/branch-1 98d13c745 -> cadc4cf15


HBASE-15808 Reduce potential bulk load intermediate space usage and waste


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/cadc4cf1
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/cadc4cf1
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/cadc4cf1

Branch: refs/heads/branch-1
Commit: cadc4cf15bc548e853332cc5fd9a116738d71d8f
Parents: 98d13c7
Author: Jerry He 
Authored: Thu May 12 15:22:56 2016 -0700
Committer: Jerry He 
Committed: Thu May 12 15:22:56 2016 -0700

--
 .../hbase/mapreduce/LoadIncrementalHFiles.java  | 26 --
 .../TestLoadIncrementalHFilesSplitRecovery.java | 36 
 2 files changed, 59 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hbase/blob/cadc4cf1/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
--
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
index 07059bc..0893b2e 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
@@ -121,6 +121,10 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
   private static final String ASSIGN_SEQ_IDS = 
"hbase.mapreduce.bulkload.assign.sequenceNumbers";
   public final static String CREATE_TABLE_CONF_KEY = "create.table";
 
+  // We use a '.' prefix which is ignored when walking directory trees
+  // above. It is invalid family name.
+  final static String TMP_DIR = ".tmp";
+
   private int maxFilesPerRegionPerFamily;
   private boolean assignSeqIds;
 
@@ -203,6 +207,14 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
   }
   Path familyDir = familyStat.getPath();
   byte[] familyName = familyDir.getName().getBytes();
+  // Skip invalid family
+  try {
+HColumnDescriptor.isLegalFamilyName(familyName);
+  }
+  catch (IllegalArgumentException e) {
+LOG.warn("Skipping invalid " + familyStat.getPath());
+continue;
+  }
   TFamily family = visitor.bulkFamily(familyName);
 
   FileStatus[] hfileStatuses = fs.listStatus(familyDir);
@@ -660,9 +672,6 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
   byte[] splitKey) throws IOException {
 final Path hfilePath = item.hfilePath;
 
-// We use a '_' prefix which is ignored when walking directory trees
-// above.
-final String TMP_DIR = "_tmp";
 Path tmpDir = item.hfilePath.getParent();
 if (!tmpDir.getName().equals(TMP_DIR)) {
   tmpDir = new Path(tmpDir, TMP_DIR);
@@ -689,6 +698,17 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
 lqis.add(new LoadQueueItem(item.family, botOut));
 lqis.add(new LoadQueueItem(item.family, topOut));
 
+// If the current item is already the result of previous splits,
+// we don't need it anymore. Clean up to save space.
+// It is not part of the original input files.
+try {
+  tmpDir = item.hfilePath.getParent();
+  if (tmpDir.getName().equals(TMP_DIR)) {
+fs.delete(item.hfilePath, false);
+  }
+} catch (IOException e) {
+  LOG.warn("Unable to delete temporary split file " + item.hfilePath);
+}
 LOG.info("Successfully split into new HFiles " + botOut + " and " + 
topOut);
 return lqis;
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/cadc4cf1/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
--
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
index e3024d0..26583f3 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hbase.mapreduce;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -61,6 +62,7 @@ import 
org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileRequ
 import org.apache.hadoop.hbase.regionserver.HRegionServer;

hbase git commit: HBASE-15590 Revert pending review comment

2016-05-12 Thread tedyu
Repository: hbase
Updated Branches:
  refs/heads/HBASE-7912 1532bb0dd -> db680d572


HBASE-15590 Revert pending review comment


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/db680d57
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/db680d57
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/db680d57

Branch: refs/heads/HBASE-7912
Commit: db680d572d0640a3f681f8a3ee3f8851a8dee189
Parents: 1532bb0
Author: tedyu 
Authored: Thu May 12 14:35:15 2016 -0700
Committer: tedyu 
Committed: Thu May 12 14:35:15 2016 -0700

--
 .../BaseMasterAndRegionObserver.java| 14 ---
 .../hbase/coprocessor/BaseMasterObserver.java   | 14 ---
 .../hbase/coprocessor/MasterObserver.java   | 26 
 .../hbase/master/MasterCoprocessorHost.java | 24 --
 .../hadoop/hbase/master/MasterRpcServices.java  | 20 +++
 .../hadoop/hbase/master/MasterServices.java |  2 +-
 .../hbase/security/access/AccessController.java | 14 ---
 .../visibility/VisibilityController.java| 14 ---
 .../hbase/coprocessor/TestMasterObserver.java   | 15 ---
 .../security/access/TestAccessController.java   | 17 -
 10 files changed, 4 insertions(+), 156 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hbase/blob/db680d57/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterAndRegionObserver.java
--
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterAndRegionObserver.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterAndRegionObserver.java
index 5fea0c0..428840c 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterAndRegionObserver.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterAndRegionObserver.java
@@ -31,7 +31,6 @@ import org.apache.hadoop.hbase.NamespaceDescriptor;
 import org.apache.hadoop.hbase.ProcedureInfo;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.backup.BackupType;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.classification.InterfaceStability;
 import org.apache.hadoop.hbase.client.Admin;
@@ -40,7 +39,6 @@ import 
org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
 import 
org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
 import org.apache.hadoop.hbase.protobuf.generated.QuotaProtos.Quotas;
-import org.apache.hadoop.hbase.util.Pair;
 
 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC)
 @InterfaceStability.Evolving
@@ -437,18 +435,6 @@ public class BaseMasterAndRegionObserver extends 
BaseRegionObserver
   }
 
   @Override
-  public void preBackupTables(final 
ObserverContext ctx,
-  final BackupType type, final List tablesList, final String 
targetRootDir,
-  final int workers, final long bandwidth) throws IOException {
-  }
-
-  @Override
-  public void postBackupTables(final 
ObserverContext ctx,
-  final BackupType type, final List tablesList, final 
Pair pair)
-  throws IOException {
-  }
-
-  @Override
   public void preBalance(ObserverContext ctx)
   throws IOException {
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/db680d57/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterObserver.java
--
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterObserver.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterObserver.java
index 0466193..7da63bf 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterObserver.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterObserver.java
@@ -31,7 +31,6 @@ import org.apache.hadoop.hbase.NamespaceDescriptor;
 import org.apache.hadoop.hbase.ProcedureInfo;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.backup.BackupType;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.classification.InterfaceStability;
 import org.apache.hadoop.hbase.client.Admin;
@@ -40,7 +39,6 @@ import 
org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
 import 
org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
 import org.apache.hadoop.hbase.protobuf.generated.QuotaProtos.Quotas;
-impor

hbase git commit: HBASE-15817 Backup history should mention the type (full or incremental) of the backup

2016-05-12 Thread tedyu
Repository: hbase
Updated Branches:
  refs/heads/HBASE-7912 d269a1419 -> 1532bb0dd


HBASE-15817 Backup history should mention the type (full or incremental) of the 
backup


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/1532bb0d
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/1532bb0d
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/1532bb0d

Branch: refs/heads/HBASE-7912
Commit: 1532bb0dd0e248d7ee075bcff0621da818d06659
Parents: d269a14
Author: tedyu 
Authored: Thu May 12 12:52:07 2016 -0700
Committer: tedyu 
Committed: Thu May 12 12:52:07 2016 -0700

--
 .../src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java| 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/hbase/blob/1532bb0d/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java
--
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java
index 1ed95f4..784cff6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java
@@ -438,6 +438,7 @@ public class BackupInfo implements Comparable {
   public String getShortDescription() {
 StringBuilder sb = new StringBuilder();
 sb.append("ID : " + backupId).append("\n");
+sb.append("Type   : " + getType()).append("\n");
 sb.append("Tables : " + getTableListAsString()).append("\n");
 sb.append("State  : " + getState()).append("\n");
 Date date = null;



hbase git commit: HBASE-15774 Fix Upgrade lock usage in connection pool.

2016-05-12 Thread eclark
Repository: hbase
Updated Branches:
  refs/heads/HBASE-14850 a72c2db64 -> b434d0eaf


HBASE-15774 Fix Upgrade lock usage in connection pool.

Summary:
Use upgrade lock better.
Fix a clang warning around initializing the UpgradeHolder incorrectly.
Remove dead code. ( We'l need to add it back when there's a better plan)
Add on more comments.

Test Plan: buck test --all

Differential Revision: https://reviews.facebook.net/D58005


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/b434d0ea
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/b434d0ea
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/b434d0ea

Branch: refs/heads/HBASE-14850
Commit: b434d0eafbc84b8f5dacfbd20a927a8cb6dd6372
Parents: a72c2db
Author: Elliott Clark 
Authored: Tue May 10 17:44:41 2016 -0700
Committer: Elliott Clark 
Committed: Thu May 12 10:29:50 2016 -0700

--
 .../connection/connection-pool-test.cc  | 12 ++--
 .../connection/connection-pool.cc   | 47 +++---
 .../connection/connection-pool.h|  6 +-
 hbase-native-client/core/client.h   |  2 +-
 hbase-native-client/core/location-cache.cc  | 68 
 5 files changed, 62 insertions(+), 73 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hbase/blob/b434d0ea/hbase-native-client/connection/connection-pool-test.cc
--
diff --git a/hbase-native-client/connection/connection-pool-test.cc 
b/hbase-native-client/connection/connection-pool-test.cc
index c0c346f..bd2d585 100644
--- a/hbase-native-client/connection/connection-pool-test.cc
+++ b/hbase-native-client/connection/connection-pool-test.cc
@@ -79,9 +79,9 @@ TEST(TestConnectionPool, TestOnlyCreateOnce) {
   sn.set_host_name(hostname);
   sn.set_port(port);
 
-  auto result = cp.get(sn);
+  auto result = cp.Get(sn);
   ASSERT_TRUE(result != nullptr);
-  result = cp.get(sn);
+  result = cp.Get(sn);
 }
 
 TEST(TestConnectionPool, TestOnlyCreateMultipleDispose) {
@@ -102,13 +102,13 @@ TEST(TestConnectionPool, TestOnlyCreateMultipleDispose) {
   ConnectionPool cp{mock_cf};
 
   {
-auto result_one = cp.get(folly::to(
+auto result_one = cp.Get(folly::to(
 hostname_one + ":" + folly::to(port)));
-auto result_two = cp.get(folly::to(
+auto result_two = cp.Get(folly::to(
 hostname_two + ":" + folly::to(port)));
   }
-  auto result_one = cp.get(
+  auto result_one = cp.Get(
   folly::to(hostname_one + ":" + 
folly::to(port)));
-  auto result_two = cp.get(
+  auto result_two = cp.Get(
   folly::to(hostname_two + ":" + 
folly::to(port)));
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/b434d0ea/hbase-native-client/connection/connection-pool.cc
--
diff --git a/hbase-native-client/connection/connection-pool.cc 
b/hbase-native-client/connection/connection-pool.cc
index 90e2056..aa3d094 100644
--- a/hbase-native-client/connection/connection-pool.cc
+++ b/hbase-native-client/connection/connection-pool.cc
@@ -48,25 +48,54 @@ ConnectionPool::~ConnectionPool() {
   clients_.clear();
 }
 
-std::shared_ptr ConnectionPool::get(const ServerName &sn) {
-  // Create a read lock.
-  SharedMutexWritePriority::UpgradeHolder holder(map_mutex_);
+std::shared_ptr ConnectionPool::Get(const ServerName &sn) {
+  // Try and get th cached connection.
+  auto found_ptr = GetCached(sn);
 
+  // If there's no connection then create it.
+  if (found_ptr == nullptr) {
+found_ptr = GetNew(sn);
+  }
+  return found_ptr;
+}
+
+std::shared_ptr ConnectionPool::GetCached(const ServerName &sn) {
+  SharedMutexWritePriority::ReadHolder holder(map_mutex_);
   auto found = connections_.find(sn);
-  if (found == connections_.end() || found->second == nullptr) {
-// Move the upgradable lock into the write lock if the connection
-// hasn't been found.
-SharedMutexWritePriority::WriteHolder holder(std::move(holder));
+  if (found == connections_.end()) {
+return nullptr;
+  }
+  return found->second;
+}
+
+std::shared_ptr ConnectionPool::GetNew(const ServerName &sn) {
+  // Grab the upgrade lock. While we are double checking other readers can
+  // continue on
+  SharedMutexWritePriority::UpgradeHolder u_holder{map_mutex_};
+
+  // Now check if someone else created the connection before we got the lock
+  // This is safe since we hold the upgrade lock.
+  // upgrade lock is more power than the reader lock.
+  auto found = connections_.find(sn);
+  if (found != connections_.end() && found->second != nullptr) {
+return found->second;
+  } else {
+// Yeah it looks a lot like there's no connection
+SharedMutexWritePriority::WriteHolder w_holder{std::move(u_holder)};
+
+// Make double sure there a