Github user karanmehta93 commented on a diff in the pull request:
https://github.com/apache/phoenix/pull/309#discussion_r203933573
--- Diff:
phoenix-core/src/it/java/org/apache/phoenix/mapreduce/VerifyReplicationToolIT.java
---
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.*;
+import java.util.*;
+
+import com.google.common.collect.Maps;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.phoenix.end2end.BaseUniqueNamesOwnClusterIT;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.ReadOnlyProps;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+public class VerifyReplicationToolIT extends BaseUniqueNamesOwnClusterIT {
+ private static final Logger LOG =
LoggerFactory.getLogger(VerifyReplicationToolIT.class);
+ private static final String CREATE_USER_TABLE = "CREATE TABLE IF NOT
EXISTS %s ( " +
+ " TENANT_ID VARCHAR NOT NULL, USER_ID VARCHAR NOT NULL, AGE
INTEGER " +
+ " CONSTRAINT pk PRIMARY KEY ( TENANT_ID, USER_ID ))";
+ private static final String UPSERT_USER = "UPSERT INTO %s VALUES (?,
?, ?)";
+ private static final String UPSERT_SELECT_USERS =
+ "UPSERT INTO %s SELECT TENANT_ID, USER_ID, %d FROM %s WHERE
TENANT_ID = ? LIMIT %d";
+ private static final Random RANDOM = new Random();
+
+ private static int tenantNum = 0;
+ private static int userNum = 0;
+ private static String sourceTableName;
+ private static String targetTableName;
+ private List<String> sourceTenants;
+ private String sourceOnlyTenant;
+ private String sourceAndTargetTenant;
+ private String targetOnlyTenant;
+
+ @BeforeClass
+ public static void createTables() throws Exception {
+ NUM_SLAVES_BASE = 2;
+ Map<String,String> props = Maps.newHashMapWithExpectedSize(1);
+ setUpTestDriver(new ReadOnlyProps(props.entrySet().iterator()));
+ Connection conn = DriverManager.getConnection(getUrl());
+ sourceTableName = generateUniqueName();
+ targetTableName = generateUniqueName();
+ // tables will have the same schema, but a different number of
regions
+ conn.createStatement().execute(String.format(CREATE_USER_TABLE,
sourceTableName));
+ conn.createStatement().execute(String.format(CREATE_USER_TABLE,
targetTableName));
+ conn.commit();
+ }
+
+ @Before
+ public void setupTenants() throws Exception {
+ sourceTenants = new ArrayList<>(2);
+ sourceTenants.add("tenant" + tenantNum++);
+ sourceTenants.add("tenant" + tenantNum++);
+ sourceOnlyTenant = sourceTenants.get(0);
+ sourceAndTargetTenant = sourceTenants.get(1);
+ targetOnlyTenant = "tenant" + tenantNum++;
+ upsertData();
+ split(sourceTableName, 4);
+ split(targetTableName, 2);
+ // ensure scans for each table touch multiple region servers
+ ensureRegionsOnDifferentServers(sourceTableName);
--- End diff --
The method assumes that you have only two regions, however you have 4
regions for the first table. If I understand correctly, you are trying to split
the table evenly between the 20 rows of 2 tenants and then assigning them on 2
region servers. However the implementation is not working in that manner.
---