This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new abe26d4169c [HUDI-5676] Fix BigQuerySyncTool standalone mode (#7816)
abe26d4169c is described below

commit abe26d4169c04da05b99941161621876e3569e96
Author: Shiyan Xu <2701446+xushi...@users.noreply.github.com>
AuthorDate: Thu Feb 2 00:39:28 2023 -0600

    [HUDI-5676] Fix BigQuerySyncTool standalone mode (#7816)
---
 .../hudi/gcp/bigquery/BigQuerySyncConfig.java      | 38 ++++--------
 .../gcp/bigquery/TestBigQuerySyncToolArgs.java     | 70 ++++++++++++++++++++++
 packaging/hudi-gcp-bundle/pom.xml                  |  8 ++-
 3 files changed, 90 insertions(+), 26 deletions(-)

diff --git 
a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java 
b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java
index b46cd9a9f81..52b3d3b74e5 100644
--- 
a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java
+++ 
b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java
@@ -20,14 +20,13 @@
 package org.apache.hudi.gcp.bigquery;
 
 import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.sync.common.HoodieSyncConfig;
 
 import com.beust.jcommander.Parameter;
 import com.beust.jcommander.ParametersDelegate;
 
 import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
 import java.util.Properties;
 
 /**
@@ -101,38 +100,27 @@ public class BigQuerySyncConfig extends HoodieSyncConfig 
implements Serializable
     public String datasetName;
     @Parameter(names = {"--dataset-location"}, description = "Location of the 
target dataset in BigQuery", required = true)
     public String datasetLocation;
-    @Parameter(names = {"--table-name"}, description = "Name of the target 
table in BigQuery", required = true)
-    public String tableName;
     @Parameter(names = {"--source-uri"}, description = "Name of the source uri 
gcs path of the table", required = true)
     public String sourceUri;
     @Parameter(names = {"--source-uri-prefix"}, description = "Name of the 
source uri gcs path prefix of the table", required = true)
     public String sourceUriPrefix;
-    @Parameter(names = {"--base-path"}, description = "Base path of the hoodie 
table to sync", required = true)
-    public String basePath;
-    @Parameter(names = {"--partitioned-by"}, description = "Comma-delimited 
partition fields. Default to non-partitioned.")
-    public List<String> partitionFields = new ArrayList<>();
-    @Parameter(names = {"--use-file-listing-from-metadata"}, description = 
"Fetch file listing from Hudi's metadata")
-    public boolean useFileListingFromMetadata = false;
-    @Parameter(names = {"--assume-date-partitioning"}, description = "Assume 
standard yyyy/mm/dd partitioning, this"
-        + " exists to support backward compatibility. If you use hoodie 0.3.x, 
do not set this parameter")
-    public boolean assumeDatePartitioning = false;
 
     public boolean isHelp() {
       return hoodieSyncConfigParams.isHelp();
     }
 
-    public Properties toProps() {
-      final Properties props = hoodieSyncConfigParams.toProps();
-      props.setProperty(BIGQUERY_SYNC_PROJECT_ID.key(), projectId);
-      props.setProperty(BIGQUERY_SYNC_DATASET_NAME.key(), datasetName);
-      props.setProperty(BIGQUERY_SYNC_DATASET_LOCATION.key(), datasetLocation);
-      props.setProperty(BIGQUERY_SYNC_TABLE_NAME.key(), tableName);
-      props.setProperty(BIGQUERY_SYNC_SOURCE_URI.key(), sourceUri);
-      props.setProperty(BIGQUERY_SYNC_SOURCE_URI_PREFIX.key(), 
sourceUriPrefix);
-      props.setProperty(BIGQUERY_SYNC_SYNC_BASE_PATH.key(), basePath);
-      props.setProperty(BIGQUERY_SYNC_PARTITION_FIELDS.key(), String.join(",", 
partitionFields));
-      props.setProperty(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA.key(), 
String.valueOf(useFileListingFromMetadata));
-      props.setProperty(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING.key(), 
String.valueOf(assumeDatePartitioning));
+    public TypedProperties toProps() {
+      final TypedProperties props = hoodieSyncConfigParams.toProps();
+      props.setPropertyIfNonNull(BIGQUERY_SYNC_PROJECT_ID.key(), projectId);
+      props.setPropertyIfNonNull(BIGQUERY_SYNC_DATASET_NAME.key(), 
datasetName);
+      props.setPropertyIfNonNull(BIGQUERY_SYNC_DATASET_LOCATION.key(), 
datasetLocation);
+      props.setPropertyIfNonNull(BIGQUERY_SYNC_TABLE_NAME.key(), 
hoodieSyncConfigParams.tableName);
+      props.setPropertyIfNonNull(BIGQUERY_SYNC_SOURCE_URI.key(), sourceUri);
+      props.setPropertyIfNonNull(BIGQUERY_SYNC_SOURCE_URI_PREFIX.key(), 
sourceUriPrefix);
+      props.setPropertyIfNonNull(BIGQUERY_SYNC_SYNC_BASE_PATH.key(), 
hoodieSyncConfigParams.basePath);
+      props.setPropertyIfNonNull(BIGQUERY_SYNC_PARTITION_FIELDS.key(), 
String.join(",", hoodieSyncConfigParams.partitionFields));
+      
props.setPropertyIfNonNull(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA.key(), 
hoodieSyncConfigParams.useFileListingFromMetadata);
+      props.setPropertyIfNonNull(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING.key(), 
hoodieSyncConfigParams.assumeDatePartitioning);
       return props;
     }
   }
diff --git 
a/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncToolArgs.java
 
b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncToolArgs.java
new file mode 100644
index 00000000000..898358484d9
--- /dev/null
+++ 
b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncToolArgs.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.gcp.bigquery;
+
+import com.beust.jcommander.JCommander;
+import org.junit.jupiter.api.Test;
+
+import java.util.Properties;
+
+import static 
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING;
+import static 
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_DATASET_LOCATION;
+import static 
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_DATASET_NAME;
+import static 
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_PARTITION_FIELDS;
+import static 
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_PROJECT_ID;
+import static 
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SOURCE_URI;
+import static 
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SOURCE_URI_PREFIX;
+import static 
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SYNC_BASE_PATH;
+import static 
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_TABLE_NAME;
+import static 
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+public class TestBigQuerySyncToolArgs {
+
+  @Test
+  public void testArgsParse() {
+    BigQuerySyncConfig.BigQuerySyncConfigParams params = new 
BigQuerySyncConfig.BigQuerySyncConfigParams();
+    JCommander cmd = JCommander.newBuilder().addObject(params).build();
+    String[] args = {
+        "--project-id", "hudi-bq",
+        "--dataset-name", "foobar",
+        "--dataset-location", "us-west1",
+        "--table", "foobartable",
+        "--source-uri", "gs://foobartable/year=*",
+        "--source-uri-prefix", "gs://foobartable/",
+        "--base-path", "gs://foobartable",
+        "--partitioned-by", "year,month,day",
+        "--use-file-listing-from-metadata"
+    };
+    cmd.parse(args);
+
+    final Properties props = params.toProps();
+    assertEquals("hudi-bq", props.getProperty(BIGQUERY_SYNC_PROJECT_ID.key()));
+    assertEquals("foobar", 
props.getProperty(BIGQUERY_SYNC_DATASET_NAME.key()));
+    assertEquals("us-west1", 
props.getProperty(BIGQUERY_SYNC_DATASET_LOCATION.key()));
+    assertEquals("foobartable", 
props.getProperty(BIGQUERY_SYNC_TABLE_NAME.key()));
+    assertEquals("gs://foobartable/year=*", 
props.getProperty(BIGQUERY_SYNC_SOURCE_URI.key()));
+    assertEquals("gs://foobartable/", 
props.getProperty(BIGQUERY_SYNC_SOURCE_URI_PREFIX.key()));
+    assertEquals("gs://foobartable", 
props.getProperty(BIGQUERY_SYNC_SYNC_BASE_PATH.key()));
+    assertEquals("year,month,day", 
props.getProperty(BIGQUERY_SYNC_PARTITION_FIELDS.key()));
+    assertEquals("true", 
props.getProperty(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA.key()));
+    
assertFalse(props.containsKey(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING.key()));
+  }
+}
diff --git a/packaging/hudi-gcp-bundle/pom.xml 
b/packaging/hudi-gcp-bundle/pom.xml
index 85d3093e89a..fe9b6b55527 100644
--- a/packaging/hudi-gcp-bundle/pom.xml
+++ b/packaging/hudi-gcp-bundle/pom.xml
@@ -95,9 +95,9 @@
                   <include>org.apache.hudi:hudi-common</include>
                   <include>org.apache.hudi:hudi-hadoop-mr</include>
                   <include>org.apache.hudi:hudi-sync-common</include>
+                  <include>org.apache.hudi:hudi-hive-sync</include>
                   <include>org.apache.hudi:hudi-gcp</include>
                   <include>org.apache.parquet:parquet-avro</include>
-
                   <include>com.google.cloud:google-cloud-bigquery</include>
                   <include>com.beust:jcommander</include>
                   <include>commons-io:commons-io</include>
@@ -164,6 +164,12 @@
       <version>${project.version}</version>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-hive-sync</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
     <dependency>
       <groupId>org.apache.hudi</groupId>
       <artifactId>hudi-gcp</artifactId>

Reply via email to