tommss opened a new issue, #5796: URL: https://github.com/apache/hudi/issues/5796
I am doing a PoC on multi writes to a table in HUDI (version 0.11.0). I am following what is present in this page (https://hudi.apache.org/docs/concurrency_control/) and I have setup Zookeeper as the lock mechanism. I am doing COW and I keep getting this error -- 22/06/08 14:51:51 ERROR Worker: `abfss://<xxxx>/.hoodie/hoodie.properties': Input/output error: Parallel access to the create path detected. Failing request to honor single writer semantics org.apache.hadoop.fs.PathIOException: `abfss://<xxxx>/.hoodie/hoodie.properties': Input/output error: Parallel access to the create path detected. Failing request to honor single writer semantics at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.checkException(AzureBlobFileSystem.java:1359) at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.create(AzureBlobFileSystem.java:294) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1064) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1052) at org.apache.hudi.common.table.HoodieTableConfig.create(HoodieTableConfig.java:414) at org.apache.hudi.common.table.HoodieTableMetaClient.initTableAndGetMetaClient(HoodieTableMetaClient.java:441) at org.apache.hudi.common.table.HoodieTableMetaClient$PropertyBuilder.initTable(HoodieTableMetaClient.java:1044) at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:164) at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:163) at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:47) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:80) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:78) at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:89) at org.apache.spark.sql.execution.QueryExecution$$anonfun$$nestedInanonfun$eagerlyExecuteCommands$1$1.$anonfun$applyOrElse$1(QueryExecution.scala:160) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$8(SQLExecution.scala:213) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:360) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:160) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:968) at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:115) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:310) at org.apache.spark.sql.execution.QueryExecution$$anonfun$$nestedInanonfun$eagerlyExecuteCommands$1$1.applyOrElse(QueryExecution.scala:160) at org.apache.spark.sql.execution.QueryExecution$$anonfun$$nestedInanonfun$eagerlyExecuteCommands$1$1.applyOrElse(QueryExecution.scala:156) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:575) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:167) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:575) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:268) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:264) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:551) at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$1(QueryExecution.scala:156) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:324) at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:156) at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:141) at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:132) at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:186) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:959) at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:427) at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:338) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:250) <xxx> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: Parallel access to the create path detected. Failing request to honor single writer semantics at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.conditionalCreateOverwriteFile(AzureBlobFileSystemStore.java:609) at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.createFile(AzureBlobFileSystemStore.java:516) at shaded.databricks.azurebfs.org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.create(AzureBlobFileSystem.java:289) ... 49 more -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org