Hi, You might want to subscribe the mailing list, so that the replies actually make it to the list automatically.
This seems like a class version mismatch between jars, since you. are getting NoSuchMethodError (and not NoClassDefFound..) We don't bundle either hadoop or aws or spark jars. There is no special config here, except So, are you able to access s3 via regular spark.read.parquet(..) successfully? - Vinoth On Thu, Nov 14, 2019 at 8:43 AM Sudharshan Rajendhiran < [email protected]> wrote: > Hello, can anyone point me to the right dependencies to configure Hudi > with to write to S3 > > I start the Spark shell with aws sdk and hadoop-aws libs as per the S3 > guide > with hudi.conf consists of spark Kryo serialiser and S3 keys. > > spark-shell --jars > $HUDI_SPARK_BUNDLE_PATH,/tmp/hudi/hadoop-aws-2.7.3.jar,/tmp/hudi/aws-java-sdk-1.7.4.jar > --properties-file ~/hudi.conf > > import org.apache.hudi.QuickstartUtils._ > import scala.collection.JavaConversions._ > import org.apache.spark.sql.SaveMode._ > import org.apache.hudi.DataSourceReadOptions._ > import org.apache.hudi.DataSourceWriteOptions._ > import org.apache.hudi.config.HoodieWriteConfig._ > > val tableName = "hudi_cow_table" > val s3path = "s3a://bucket-name/hudi-test/" > val dataGen = new DataGenerator > > val inserts = convertToStringList(dataGen.generateInserts(10)) > val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2)) > df.write.format("org.apache.hudi"). > options(getQuickstartWriteConfigs). > option(PRECOMBINE_FIELD_OPT_KEY, "ts"). > option(RECORDKEY_FIELD_OPT_KEY, "uuid"). > option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath"). > option(TABLE_NAME, tableName). > mode(Overwrite). > save(s3path); > > > results in Error > > java.lang.NoSuchMethodError: > com.amazonaws.services.s3.transfer.TransferManager.<init>(Lcom/amazonaws/services/s3/AmazonS3;Ljava/util/concurrent/ThreadPoolExecutor;)V > at > org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:287) > at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2669) > at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:94) > at > org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2703) > at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2685) > at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:373) > at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295) > at > org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:98) > at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:91) > at > org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80) > at > org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) > at > org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676) > at > org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676) > at > org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) > at > org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676) > at > org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:285) > at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271) > at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:229) > ... 68 elided > >
