This is an automated email from the ASF dual-hosted git repository. xushiyan pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push: new abe26d4169c [HUDI-5676] Fix BigQuerySyncTool standalone mode (#7816) abe26d4169c is described below commit abe26d4169c04da05b99941161621876e3569e96 Author: Shiyan Xu <2701446+xushi...@users.noreply.github.com> AuthorDate: Thu Feb 2 00:39:28 2023 -0600 [HUDI-5676] Fix BigQuerySyncTool standalone mode (#7816) --- .../hudi/gcp/bigquery/BigQuerySyncConfig.java | 38 ++++-------- .../gcp/bigquery/TestBigQuerySyncToolArgs.java | 70 ++++++++++++++++++++++ packaging/hudi-gcp-bundle/pom.xml | 8 ++- 3 files changed, 90 insertions(+), 26 deletions(-) diff --git a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java index b46cd9a9f81..52b3d3b74e5 100644 --- a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java +++ b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java @@ -20,14 +20,13 @@ package org.apache.hudi.gcp.bigquery; import org.apache.hudi.common.config.ConfigProperty; +import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.sync.common.HoodieSyncConfig; import com.beust.jcommander.Parameter; import com.beust.jcommander.ParametersDelegate; import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; import java.util.Properties; /** @@ -101,38 +100,27 @@ public class BigQuerySyncConfig extends HoodieSyncConfig implements Serializable public String datasetName; @Parameter(names = {"--dataset-location"}, description = "Location of the target dataset in BigQuery", required = true) public String datasetLocation; - @Parameter(names = {"--table-name"}, description = "Name of the target table in BigQuery", required = true) - public String tableName; @Parameter(names = {"--source-uri"}, description = "Name of the source uri gcs path of the table", required = true) public String sourceUri; @Parameter(names = {"--source-uri-prefix"}, description = "Name of the source uri gcs path prefix of the table", required = true) public String sourceUriPrefix; - @Parameter(names = {"--base-path"}, description = "Base path of the hoodie table to sync", required = true) - public String basePath; - @Parameter(names = {"--partitioned-by"}, description = "Comma-delimited partition fields. Default to non-partitioned.") - public List<String> partitionFields = new ArrayList<>(); - @Parameter(names = {"--use-file-listing-from-metadata"}, description = "Fetch file listing from Hudi's metadata") - public boolean useFileListingFromMetadata = false; - @Parameter(names = {"--assume-date-partitioning"}, description = "Assume standard yyyy/mm/dd partitioning, this" - + " exists to support backward compatibility. If you use hoodie 0.3.x, do not set this parameter") - public boolean assumeDatePartitioning = false; public boolean isHelp() { return hoodieSyncConfigParams.isHelp(); } - public Properties toProps() { - final Properties props = hoodieSyncConfigParams.toProps(); - props.setProperty(BIGQUERY_SYNC_PROJECT_ID.key(), projectId); - props.setProperty(BIGQUERY_SYNC_DATASET_NAME.key(), datasetName); - props.setProperty(BIGQUERY_SYNC_DATASET_LOCATION.key(), datasetLocation); - props.setProperty(BIGQUERY_SYNC_TABLE_NAME.key(), tableName); - props.setProperty(BIGQUERY_SYNC_SOURCE_URI.key(), sourceUri); - props.setProperty(BIGQUERY_SYNC_SOURCE_URI_PREFIX.key(), sourceUriPrefix); - props.setProperty(BIGQUERY_SYNC_SYNC_BASE_PATH.key(), basePath); - props.setProperty(BIGQUERY_SYNC_PARTITION_FIELDS.key(), String.join(",", partitionFields)); - props.setProperty(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA.key(), String.valueOf(useFileListingFromMetadata)); - props.setProperty(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING.key(), String.valueOf(assumeDatePartitioning)); + public TypedProperties toProps() { + final TypedProperties props = hoodieSyncConfigParams.toProps(); + props.setPropertyIfNonNull(BIGQUERY_SYNC_PROJECT_ID.key(), projectId); + props.setPropertyIfNonNull(BIGQUERY_SYNC_DATASET_NAME.key(), datasetName); + props.setPropertyIfNonNull(BIGQUERY_SYNC_DATASET_LOCATION.key(), datasetLocation); + props.setPropertyIfNonNull(BIGQUERY_SYNC_TABLE_NAME.key(), hoodieSyncConfigParams.tableName); + props.setPropertyIfNonNull(BIGQUERY_SYNC_SOURCE_URI.key(), sourceUri); + props.setPropertyIfNonNull(BIGQUERY_SYNC_SOURCE_URI_PREFIX.key(), sourceUriPrefix); + props.setPropertyIfNonNull(BIGQUERY_SYNC_SYNC_BASE_PATH.key(), hoodieSyncConfigParams.basePath); + props.setPropertyIfNonNull(BIGQUERY_SYNC_PARTITION_FIELDS.key(), String.join(",", hoodieSyncConfigParams.partitionFields)); + props.setPropertyIfNonNull(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA.key(), hoodieSyncConfigParams.useFileListingFromMetadata); + props.setPropertyIfNonNull(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING.key(), hoodieSyncConfigParams.assumeDatePartitioning); return props; } } diff --git a/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncToolArgs.java b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncToolArgs.java new file mode 100644 index 00000000000..898358484d9 --- /dev/null +++ b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncToolArgs.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.gcp.bigquery; + +import com.beust.jcommander.JCommander; +import org.junit.jupiter.api.Test; + +import java.util.Properties; + +import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING; +import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_DATASET_LOCATION; +import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_DATASET_NAME; +import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_PARTITION_FIELDS; +import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_PROJECT_ID; +import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SOURCE_URI; +import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SOURCE_URI_PREFIX; +import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SYNC_BASE_PATH; +import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_TABLE_NAME; +import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; + +public class TestBigQuerySyncToolArgs { + + @Test + public void testArgsParse() { + BigQuerySyncConfig.BigQuerySyncConfigParams params = new BigQuerySyncConfig.BigQuerySyncConfigParams(); + JCommander cmd = JCommander.newBuilder().addObject(params).build(); + String[] args = { + "--project-id", "hudi-bq", + "--dataset-name", "foobar", + "--dataset-location", "us-west1", + "--table", "foobartable", + "--source-uri", "gs://foobartable/year=*", + "--source-uri-prefix", "gs://foobartable/", + "--base-path", "gs://foobartable", + "--partitioned-by", "year,month,day", + "--use-file-listing-from-metadata" + }; + cmd.parse(args); + + final Properties props = params.toProps(); + assertEquals("hudi-bq", props.getProperty(BIGQUERY_SYNC_PROJECT_ID.key())); + assertEquals("foobar", props.getProperty(BIGQUERY_SYNC_DATASET_NAME.key())); + assertEquals("us-west1", props.getProperty(BIGQUERY_SYNC_DATASET_LOCATION.key())); + assertEquals("foobartable", props.getProperty(BIGQUERY_SYNC_TABLE_NAME.key())); + assertEquals("gs://foobartable/year=*", props.getProperty(BIGQUERY_SYNC_SOURCE_URI.key())); + assertEquals("gs://foobartable/", props.getProperty(BIGQUERY_SYNC_SOURCE_URI_PREFIX.key())); + assertEquals("gs://foobartable", props.getProperty(BIGQUERY_SYNC_SYNC_BASE_PATH.key())); + assertEquals("year,month,day", props.getProperty(BIGQUERY_SYNC_PARTITION_FIELDS.key())); + assertEquals("true", props.getProperty(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA.key())); + assertFalse(props.containsKey(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING.key())); + } +} diff --git a/packaging/hudi-gcp-bundle/pom.xml b/packaging/hudi-gcp-bundle/pom.xml index 85d3093e89a..fe9b6b55527 100644 --- a/packaging/hudi-gcp-bundle/pom.xml +++ b/packaging/hudi-gcp-bundle/pom.xml @@ -95,9 +95,9 @@ <include>org.apache.hudi:hudi-common</include> <include>org.apache.hudi:hudi-hadoop-mr</include> <include>org.apache.hudi:hudi-sync-common</include> + <include>org.apache.hudi:hudi-hive-sync</include> <include>org.apache.hudi:hudi-gcp</include> <include>org.apache.parquet:parquet-avro</include> - <include>com.google.cloud:google-cloud-bigquery</include> <include>com.beust:jcommander</include> <include>commons-io:commons-io</include> @@ -164,6 +164,12 @@ <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.apache.hudi</groupId> + <artifactId>hudi-hive-sync</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> <groupId>org.apache.hudi</groupId> <artifactId>hudi-gcp</artifactId>