[GitHub] [hbase] saintstack commented on a change in pull request #1016: HBASE-23656 [MERGETOOL] HBASE Support Merge region by pattern

2020-01-17 Thread GitBox
saintstack commented on a change in pull request #1016: HBASE-23656 [MERGETOOL] 
HBASE Support Merge region by pattern
URL: https://github.com/apache/hbase/pull/1016#discussion_r368174853
 
 

 ##
 File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/util/OnlineMergeTool.java
 ##
 @@ -0,0 +1,528 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
 
 Review comment:
   Fix year.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [hbase] saintstack commented on a change in pull request #1016: HBASE-23656 [MERGETOOL] HBASE Support Merge region by pattern

2020-01-17 Thread GitBox
saintstack commented on a change in pull request #1016: HBASE-23656 [MERGETOOL] 
HBASE Support Merge region by pattern
URL: https://github.com/apache/hbase/pull/1016#discussion_r368176220
 
 

 ##
 File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/util/OnlineMergeTool.java
 ##
 @@ -0,0 +1,528 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MasterNotRunningException;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HConnectionManager;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility that can merge any two regions in the same table: adjacent,
+ * overlapping or disjoint. It can also merge every regions, two by two.
+ */
+@InterfaceAudience.Private
+public class OnlineMergeTool extends Configured implements Tool {
+  static final Logger LOG = LoggerFactory.getLogger(OnlineMergeTool.class);
+  private final int COMPACTPAUSETIME = 180 * 1000;
+  private final int DEFAULTMERGEPAUSETIME = 120 * 1000;
+  private final String COMPACTIONATTRIBUTE = "MAJOR";
+  private final long GB = 1024L * 1024L * 1024L;
+  private final SimpleDateFormat DATE_FORMAT
+  = new SimpleDateFormat("/MM/dd HH:mm:ss");
+  private final HBaseConfiguration conf;
+  private volatile MetaUtils utils;
+  private volatile boolean isMetaTable;
+  private volatile Connection connection;
+  private volatile Admin admin;
+  // Name of table
+  private String tableName = null;
+  // Name of region 1
+  private String startRegion = null;
+  // Name of region 2
+  private String stopRegion = null;
+  // Name of maxRegionSize
+  private Long maxRegionSize = 0L;
+  // Name of maxRegionCreateTime
+  private String maxRegionCreateTime = null;
+  // Name of numMaxMergePlans
+  private String numMaxMergePlans = null;
+  // Name of targetRegionCount
+  private Long targetRegionCount = 0L;
+  /**
+   * print Execution Plan information
+   */
+  private boolean printExecutionPlan = true;
+  /**
+   * config merge pause time
+   */
+  private int mergePauseTime = 0;
+
+  /**
+   * default constructor
+   */
+  public OnlineMergeTool() throws IOException {
+this(new HBaseConfiguration());
+  }
+
+  /**
+   * @param conf The current configuration.
+   * @throws IOException If IO problem encountered
+   */
+  public OnlineMergeTool(HBaseConfiguration conf) throws IOException {
+super(conf);
+this.conf = conf;
+this.conf.setInt("hbase.client.retries.number", 3);
+this.conf.setInt("hbase.client.pause", 1000);
+this.connection = HConnectionManager.createConnection(this.conf);
+this.admin = connection.getAdmin();
+  }
+
+  /**
+   * Main program
+   *
+   * @param args The command line parameters.
+   */
+  public static void main(String[] args) {
+int status = 0;
+try {
+  status = ToolRunner.run(new OnlineMergeTool(), args);
+} catch (Exception e) {
+  LOG.error("exiting due to error", e);
+  status = -1;
+}
+System.exit(status);
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+
+if (!doCommandLine(args)) {
+  return -1;
+}
+
+isMetaTable = Bytes.compareTo(Bytes.toBytes(tableName), 
HConstants.META_TABLE_NAME) == 0;
+// Verify file system is up.
+FileSystem fs = FileSystem.get(this.conf);  // get DFS handle
+

[GitHub] [hbase] saintstack commented on a change in pull request #1016: HBASE-23656 [MERGETOOL] HBASE Support Merge region by pattern

2020-01-17 Thread GitBox
saintstack commented on a change in pull request #1016: HBASE-23656 [MERGETOOL] 
HBASE Support Merge region by pattern
URL: https://github.com/apache/hbase/pull/1016#discussion_r368175421
 
 

 ##
 File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/util/OnlineMergeTool.java
 ##
 @@ -0,0 +1,528 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MasterNotRunningException;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HConnectionManager;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility that can merge any two regions in the same table: adjacent,
+ * overlapping or disjoint. It can also merge every regions, two by two.
+ */
+@InterfaceAudience.Private
+public class OnlineMergeTool extends Configured implements Tool {
+  static final Logger LOG = LoggerFactory.getLogger(OnlineMergeTool.class);
+  private final int COMPACTPAUSETIME = 180 * 1000;
+  private final int DEFAULTMERGEPAUSETIME = 120 * 1000;
+  private final String COMPACTIONATTRIBUTE = "MAJOR";
+  private final long GB = 1024L * 1024L * 1024L;
+  private final SimpleDateFormat DATE_FORMAT
+  = new SimpleDateFormat("/MM/dd HH:mm:ss");
+  private final HBaseConfiguration conf;
+  private volatile MetaUtils utils;
+  private volatile boolean isMetaTable;
+  private volatile Connection connection;
+  private volatile Admin admin;
+  // Name of table
+  private String tableName = null;
+  // Name of region 1
+  private String startRegion = null;
+  // Name of region 2
+  private String stopRegion = null;
+  // Name of maxRegionSize
+  private Long maxRegionSize = 0L;
+  // Name of maxRegionCreateTime
+  private String maxRegionCreateTime = null;
+  // Name of numMaxMergePlans
+  private String numMaxMergePlans = null;
+  // Name of targetRegionCount
+  private Long targetRegionCount = 0L;
+  /**
+   * print Execution Plan information
+   */
+  private boolean printExecutionPlan = true;
+  /**
+   * config merge pause time
+   */
+  private int mergePauseTime = 0;
+
+  /**
+   * default constructor
+   */
+  public OnlineMergeTool() throws IOException {
+this(new HBaseConfiguration());
+  }
+
+  /**
+   * @param conf The current configuration.
+   * @throws IOException If IO problem encountered
+   */
+  public OnlineMergeTool(HBaseConfiguration conf) throws IOException {
 
 Review comment:
   Yeah, arent' all merges online? Why not just MergeTool?


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [hbase] saintstack commented on a change in pull request #1016: HBASE-23656 [MERGETOOL] HBASE Support Merge region by pattern

2020-01-17 Thread GitBox
saintstack commented on a change in pull request #1016: HBASE-23656 [MERGETOOL] 
HBASE Support Merge region by pattern
URL: https://github.com/apache/hbase/pull/1016#discussion_r368175257
 
 

 ##
 File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/util/OnlineMergeTool.java
 ##
 @@ -0,0 +1,528 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MasterNotRunningException;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HConnectionManager;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility that can merge any two regions in the same table: adjacent,
+ * overlapping or disjoint. It can also merge every regions, two by two.
 
 Review comment:
   This is for branch-1? In branch-2, we have a multimerge procedure that can 
take many (adjacent) regions and squash them up together.
   
   We could have this in branch-1. What would you do for branch-2?


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [hbase] saintstack commented on a change in pull request #1016: HBASE-23656 [MERGETOOL] HBASE Support Merge region by pattern

2020-01-17 Thread GitBox
saintstack commented on a change in pull request #1016: HBASE-23656 [MERGETOOL] 
HBASE Support Merge region by pattern
URL: https://github.com/apache/hbase/pull/1016#discussion_r368174999
 
 

 ##
 File path: bin/hbase.cmd
 ##
 @@ -436,4 +436,5 @@ goto :eof
   echo   mapredcpDump CLASSPATH entries required by mapreduce
   echo   version Print the version
   echo   CLASSNAME   Run the class named CLASSNAME
+  echo   onlinemerge Run the merge tool
 
 Review comment:
   What is difference between a merge and an online merge? Why does this have 
to be here and not in the shell? Shell already has a merge region facility. Add 
this as an option to the shell command?


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [hbase] saintstack commented on a change in pull request #1016: HBASE-23656 [MERGETOOL] HBASE Support Merge region by pattern

2020-01-17 Thread GitBox
saintstack commented on a change in pull request #1016: HBASE-23656 [MERGETOOL] 
HBASE Support Merge region by pattern
URL: https://github.com/apache/hbase/pull/1016#discussion_r368175342
 
 

 ##
 File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/util/OnlineMergeTool.java
 ##
 @@ -0,0 +1,528 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MasterNotRunningException;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HConnectionManager;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility that can merge any two regions in the same table: adjacent,
+ * overlapping or disjoint. It can also merge every regions, two by two.
+ */
+@InterfaceAudience.Private
+public class OnlineMergeTool extends Configured implements Tool {
+  static final Logger LOG = LoggerFactory.getLogger(OnlineMergeTool.class);
+  private final int COMPACTPAUSETIME = 180 * 1000;
+  private final int DEFAULTMERGEPAUSETIME = 120 * 1000;
+  private final String COMPACTIONATTRIBUTE = "MAJOR";
+  private final long GB = 1024L * 1024L * 1024L;
+  private final SimpleDateFormat DATE_FORMAT
+  = new SimpleDateFormat("/MM/dd HH:mm:ss");
 
 Review comment:
   Do ISO8601?


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [hbase] saintstack commented on a change in pull request #1016: HBASE-23656 [MERGETOOL] HBASE Support Merge region by pattern

2020-01-17 Thread GitBox
saintstack commented on a change in pull request #1016: HBASE-23656 [MERGETOOL] 
HBASE Support Merge region by pattern
URL: https://github.com/apache/hbase/pull/1016#discussion_r368176192
 
 

 ##
 File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/util/OnlineMergeTool.java
 ##
 @@ -0,0 +1,528 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MasterNotRunningException;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HConnectionManager;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility that can merge any two regions in the same table: adjacent,
+ * overlapping or disjoint. It can also merge every regions, two by two.
+ */
+@InterfaceAudience.Private
+public class OnlineMergeTool extends Configured implements Tool {
+  static final Logger LOG = LoggerFactory.getLogger(OnlineMergeTool.class);
+  private final int COMPACTPAUSETIME = 180 * 1000;
+  private final int DEFAULTMERGEPAUSETIME = 120 * 1000;
+  private final String COMPACTIONATTRIBUTE = "MAJOR";
+  private final long GB = 1024L * 1024L * 1024L;
+  private final SimpleDateFormat DATE_FORMAT
+  = new SimpleDateFormat("/MM/dd HH:mm:ss");
+  private final HBaseConfiguration conf;
+  private volatile MetaUtils utils;
+  private volatile boolean isMetaTable;
+  private volatile Connection connection;
+  private volatile Admin admin;
+  // Name of table
+  private String tableName = null;
+  // Name of region 1
+  private String startRegion = null;
+  // Name of region 2
+  private String stopRegion = null;
+  // Name of maxRegionSize
+  private Long maxRegionSize = 0L;
+  // Name of maxRegionCreateTime
+  private String maxRegionCreateTime = null;
+  // Name of numMaxMergePlans
+  private String numMaxMergePlans = null;
+  // Name of targetRegionCount
+  private Long targetRegionCount = 0L;
+  /**
+   * print Execution Plan information
+   */
+  private boolean printExecutionPlan = true;
+  /**
+   * config merge pause time
+   */
+  private int mergePauseTime = 0;
+
+  /**
+   * default constructor
+   */
+  public OnlineMergeTool() throws IOException {
+this(new HBaseConfiguration());
+  }
+
+  /**
+   * @param conf The current configuration.
+   * @throws IOException If IO problem encountered
+   */
+  public OnlineMergeTool(HBaseConfiguration conf) throws IOException {
+super(conf);
+this.conf = conf;
+this.conf.setInt("hbase.client.retries.number", 3);
+this.conf.setInt("hbase.client.pause", 1000);
+this.connection = HConnectionManager.createConnection(this.conf);
+this.admin = connection.getAdmin();
+  }
+
+  /**
+   * Main program
+   *
+   * @param args The command line parameters.
+   */
+  public static void main(String[] args) {
+int status = 0;
+try {
+  status = ToolRunner.run(new OnlineMergeTool(), args);
+} catch (Exception e) {
+  LOG.error("exiting due to error", e);
+  status = -1;
+}
+System.exit(status);
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+
+if (!doCommandLine(args)) {
+  return -1;
+}
+
+isMetaTable = Bytes.compareTo(Bytes.toBytes(tableName), 
HConstants.META_TABLE_NAME) == 0;
+// Verify file system is up.
+FileSystem fs = FileSystem.get(this.conf);  // get DFS handle
+