This is an automated email from the ASF dual-hosted git repository. vinoth pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push: new 32ade368d89 [MINOR] Clean default Hadoop configuration values in tests (#10495) 32ade368d89 is described below commit 32ade368d899ede5c8e7854863945864604b5692 Author: Lin Liu <141371752+linliu-c...@users.noreply.github.com> AuthorDate: Tue Jan 16 14:24:23 2024 -0800 [MINOR] Clean default Hadoop configuration values in tests (#10495) * [MINOR] Clean default Hadoop configurations for SparkContext These default Hadoop configurations are not used in Hudi tests. * Consolidating the code into a helper class --------- Co-authored-by: vinoth chandar <vin...@apache.org> --- .../org/apache/hudi/testutils/HoodieClientTestUtils.java | 14 ++++++++++++++ .../hudi/testutils/HoodieSparkClientTestHarness.java | 9 ++++++--- .../hudi/testutils/SparkClientFunctionalTestHarness.java | 1 + 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java index 991c615c35d..55619a2a24b 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java @@ -53,6 +53,7 @@ import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.spark.SparkConf; +import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -61,6 +62,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.lang.reflect.Field; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -125,6 +127,18 @@ public class HoodieClientTestUtils { return SparkRDDReadClient.addHoodieSupport(sparkConf); } + public static void overrideSparkHadoopConfiguration(SparkContext sparkContext) { + try { + // Clean the default Hadoop configurations since in our Hudi tests they are not used. + Field hadoopConfigurationField = sparkContext.getClass().getDeclaredField("_hadoopConfiguration"); + hadoopConfigurationField.setAccessible(true); + Configuration testHadoopConfig = new Configuration(false); + hadoopConfigurationField.set(sparkContext, testHadoopConfig); + } catch (NoSuchFieldException | IllegalAccessException e) { + LOG.warn(e.getMessage()); + } + } + private static HashMap<String, String> getLatestFileIDsToFullPath(String basePath, HoodieTimeline commitTimeline, List<HoodieInstant> commitsToReturn) throws IOException { HashMap<String, String> fileIdToFullPath = new HashMap<>(); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java index 2a83baa018c..59cfcb4bb6d 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java @@ -69,6 +69,8 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; +import org.apache.spark.SparkConf; +import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SQLContext; @@ -191,11 +193,12 @@ public abstract class HoodieSparkClientTestHarness extends HoodieWriterClientTes } // Initialize a local spark env - jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest(appName + "#" + testMethodName)); + SparkConf sc = HoodieClientTestUtils.getSparkConfForTest(appName + "#" + testMethodName); + SparkContext sparkContext = new SparkContext(sc); + HoodieClientTestUtils.overrideSparkHadoopConfiguration(sparkContext); + jsc = new JavaSparkContext(sparkContext); jsc.setLogLevel("ERROR"); - hadoopConf = jsc.hadoopConfiguration(); - sparkSession = SparkSession.builder() .withExtensions(JFunction.toScala(sparkSessionExtensions -> { sparkSessionExtensionsInjector.ifPresent(injector -> injector.accept(sparkSessionExtensions)); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java index 511613d9044..14d325bfdac 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java @@ -201,6 +201,7 @@ public class SparkClientFunctionalTestHarness implements SparkProvider, HoodieMe SparkRDDReadClient.addHoodieSupport(sparkConf); spark = SparkSession.builder().config(sparkConf).getOrCreate(); sqlContext = spark.sqlContext(); + HoodieClientTestUtils.overrideSparkHadoopConfiguration(spark.sparkContext()); jsc = new JavaSparkContext(spark.sparkContext()); context = new HoodieSparkEngineContext(jsc); timelineService = HoodieClientTestUtils.initTimelineService(