[GitHub] [hudi] yanghua commented on a change in pull request #1774: [HUDI-703]Add unit test for HoodieSyncCommand

2020-07-23 Thread GitBox


yanghua commented on a change in pull request #1774:
URL: https://github.com/apache/hudi/pull/1774#discussion_r459376057



##
File path: hudi-spark/src/test/java/HoodieJavaGenerateApp.java
##
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hudi.DataSourceWriteOptions;
+import org.apache.hudi.HoodieDataSourceHelpers;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.hive.MultiPartKeysValueExtractor;
+import org.apache.hudi.hive.NonPartitionedExtractor;
+import org.apache.hudi.keygen.NonpartitionedKeyGenerator;
+import org.apache.hudi.keygen.SimpleKeyGenerator;
+import org.apache.hudi.testutils.DataSourceTestUtils;
+import org.apache.hudi.testutils.HoodieTestDataGenerator;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.DataFrameWriter;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class HoodieJavaGenerateApp {
+  @Parameter(names = {"--table-path", "-p"}, description = "path for Hoodie 
sample table")
+  private String tablePath = "file:///tmp/hoodie/sample-table";
+
+  @Parameter(names = {"--table-name", "-n"}, description = "table name for 
Hoodie sample table")
+  private String tableName = "hoodie_test";
+
+  @Parameter(names = {"--table-type", "-t"}, description = "One of 
COPY_ON_WRITE or MERGE_ON_READ")
+  private String tableType = HoodieTableType.COPY_ON_WRITE.name();
+
+  @Parameter(names = {"--hive-sync", "-hv"}, description = "Enable syncing to 
hive")

Review comment:
   Why it is not `-hs`? Does it cause conflicts?

##
File path: hudi-spark/src/test/java/HoodieJavaGenerateApp.java
##
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hudi.DataSourceWriteOptions;
+import org.apache.hudi.HoodieDataSourceHelpers;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.hive.MultiPartKeysValueExtractor;
+import org.apache.hudi.hive.NonPartitionedExtractor;
+import org.apache.hudi.keygen.NonpartitionedKeyGenerator;
+import org.apache.hudi.keygen.SimpleKeyGenerator;
+import org.apache.hudi.testutils.DataSourceTestUtils;
+import org.apache.hudi.testutils.HoodieTestDataGenerator;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.DataFrameWriter;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class HoodieJavaGenerateApp {
+  

[GitHub] [hudi] yanghua commented on a change in pull request #1774: [HUDI-703]Add unit test for HoodieSyncCommand

2020-07-20 Thread GitBox


yanghua commented on a change in pull request #1774:
URL: https://github.com/apache/hudi/pull/1774#discussion_r457278729



##
File path: 
hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieSyncCommand.java
##
@@ -74,9 +74,9 @@ public String validateSync(
 }
 
 String targetLatestCommit =
-targetTimeline.getInstants().iterator().hasNext() ? "0" : 
targetTimeline.lastInstant().get().getTimestamp();
+targetTimeline.getInstants().iterator().hasNext() ? 
targetTimeline.lastInstant().get().getTimestamp() : "0";

Review comment:
   Good catch!

##
File path: 
hudi-integ-test/src/test/java/org/apache/hudi/integ/HoodieTestHiveBase.java
##
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.integ;
+
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.util.collection.Pair;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Base class to run cmd and generate data in hive.
+ */
+public class HoodieTestHiveBase extends ITTestBase {
+
+  protected enum PartitionType {
+SINGLE_KEY_PARTITIONED, MULTI_KEYS_PARTITIONED, NON_PARTITIONED,
+  }
+
+  /**
+   * A basic integration test that runs HoodieJavaApp to create a sample 
Hoodie data-set and performs upserts on it.
+   * Hive integration and upsert functionality is checked by running a count 
query in hive console. TODO: Add
+   * spark-shell test-case
+   */
+  public void generateDataByHoodieJavaApp(String hiveTableName, String 
tableType, PartitionType partitionType,
+  String commitType, String hoodieTableName) throws Exception {
+
+String hdfsPath = getHdfsPath(hiveTableName);
+String hdfsUrl = "hdfs://namenode" + hdfsPath;
+
+Pair stdOutErr;
+if ("overwrite".equals(commitType)) {
+  // Drop Table if it exists
+  try {
+dropHiveTables(hiveTableName, tableType);
+  } catch (AssertionError ex) {
+// In travis, sometimes, the hivemetastore is not ready even though we 
wait for the port to be up
+// Workaround to sleep for 5 secs and retry
+// Set sleep time by hoodie.hiveserver.time.wait
+Thread.sleep(getTimeWait());
+dropHiveTables(hiveTableName, tableType);
+  }
+
+  // Ensure table does not exist
+  stdOutErr = executeHiveCommand("show tables like '" + hiveTableName + 
"'");
+  assertTrue(stdOutErr.getLeft().isEmpty(), "Dropped table " + 
hiveTableName + " exists!");
+}
+
+// Run Hoodie Java App
+String cmd = String.format("%s %s --hive-sync --table-path %s  --hive-url 
%s  --table-type %s  --hive-table %s" +
+" --commit-type %s  --table-name %s", HOODIE_JAVA_APP, 
"HoodieJavaGenerateApp", hdfsUrl, HIVE_SERVER_JDBC_URL,
+tableType, hiveTableName, commitType, hoodieTableName);
+if (partitionType == PartitionType.MULTI_KEYS_PARTITIONED) {
+  cmd = cmd + " --use-multi-partition-keys";
+} else if (partitionType == PartitionType.NON_PARTITIONED){
+  cmd = cmd + " --non-partitioned";
+}
+executeCommandStringInDocker(ADHOC_1_CONTAINER, cmd, true);
+
+String snapshotTableName = getSnapshotTableName(tableType, hiveTableName);
+
+// Ensure table does exist
+stdOutErr = executeHiveCommand("show tables like '" + snapshotTableName + 
"'");
+assertEquals(snapshotTableName, stdOutErr.getLeft(), "Table exists");
+  }
+
+  protected void dropHiveTables(String hiveTableName, String tableType) throws 
Exception {
+if (tableType.equals(HoodieTableType.MERGE_ON_READ.name())) {
+  executeHiveCommand("drop table if exists " + hiveTableName + "_rt");
+  executeHiveCommand("drop table if exists " + hiveTableName + "_ro");
+} else {
+  executeHiveCommand("drop table if exists " + hiveTableName);
+}
+  }
+
+  protected String getHdfsPath(String hiveTableName) {

Review comment:
   `getHDFSPath` looks better?

##
File path: 

[GitHub] [hudi] yanghua commented on a change in pull request #1774: [HUDI-703]Add unit test for HoodieSyncCommand

2020-07-15 Thread GitBox


yanghua commented on a change in pull request #1774:
URL: https://github.com/apache/hudi/pull/1774#discussion_r454987028



##
File path: 
hudi-integ-test/src/test/java/org/apache/hudi/integ/HoodieTestHiveBase.java
##
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.integ;
+
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.util.collection.Pair;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Base class to run cmd and generate data in hive.
+ */
+public class HoodieTestHiveBase extends ITTestBase {
+
+  protected enum PartitionType {
+SINGLE_KEY_PARTITIONED, MULTI_KEYS_PARTITIONED, NON_PARTITIONED,
+  }
+
+  /**
+   * A basic integration test that runs HoodieJavaApp to create a sample 
Hoodie data-set and performs upserts on it.
+   * Hive integration and upsert functionality is checked by running a count 
query in hive console. TODO: Add
+   * spark-shell test-case
+   */
+  public void generateDataByHoodieJavaApp(String hiveTableName, String 
tableType, PartitionType partitionType,
+  String commitType, String hoodieTableName) throws Exception {
+
+String hdfsPath = getHdfsPath(hiveTableName);
+String hdfsUrl = "hdfs://namenode" + hdfsPath;
+
+// Drop Table if it exists
+try {
+  dropHiveTables(hiveTableName, tableType);
+} catch (AssertionError ex) {
+  // In travis, sometimes, the hivemetastore is not ready even though we 
wait for the port to be up
+  // Workaround to sleep for 5 secs and retry
+  Thread.sleep(5000);
+  dropHiveTables(hiveTableName, tableType);
+}
+
+// Ensure table does not exist
+Pair stdOutErr = null;
+if (commitType == "overwrite") {

Review comment:
   Replace `==` to `equal` method

##
File path: 
hudi-integ-test/src/test/java/org/apache/hudi/integ/HoodieTestHiveBase.java
##
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.integ;
+
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.util.collection.Pair;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Base class to run cmd and generate data in hive.
+ */
+public class HoodieTestHiveBase extends ITTestBase {
+
+  protected enum PartitionType {
+SINGLE_KEY_PARTITIONED, MULTI_KEYS_PARTITIONED, NON_PARTITIONED,
+  }
+
+  /**
+   * A basic integration test that runs HoodieJavaApp to create a sample 
Hoodie data-set and performs upserts on it.
+   * Hive integration and upsert functionality is checked by running a count 
query in hive console. TODO: Add
+   * spark-shell test-case
+   */
+  public void generateDataByHoodieJavaApp(String hiveTableName, String 
tableType, PartitionType partitionType,
+  String commitType, String hoodieTableName) throws Exception {
+
+String hdfsPath = getHdfsPath(hiveTableName);
+String hdfsUrl = "hdfs://namenode" + hdfsPath;
+
+// Drop Table if it exists
+try {
+  dropHiveTables(hiveTableName, tableType);
+} catch (AssertionError ex) {
+  // In travis, sometimes, the hivemetastore is not ready even though we 
wait for the port to be up
+  // Workaround to sleep for 5 secs and retry
+  Thread.sleep(5000);
+