gsudhanshu opened a new issue, #10432: URL: https://github.com/apache/hudi/issues/10432
My Setup: My Django application on app server uses pyspark 3.4.2 to connect to spark-hudi bundle (3.4.2:2_12:0.14.0) on db server spark config: ``` spark = SparkSession.builder \ .appName("dataHudi") \ .master('spark://db_server:7077') \ .config('spark.driver.bindAddress', '0.0.0.0') \ .config('spark.driver.host', 'app_server') \ .config('spark.driver.port', '37077') \ .config('spark.driver.blockManager.port', '37078') \ .config('spark.executor.host', 'db_server') \ .config("spark.executor.port", "37079") \ .config('spark.fileserver.host', 'db_server') \ .config("spark.fileserver.port", "37080") \ .config('spark.replClassServer.host', 'db_server') \ .config("spark.replClassServer.port", "37081") \ .config('spark.broadcast.host', 'db_server') \ .config("spark.broadcast.port", "37082") \ .config('spark.driver.memory', '7g') \ .config('spark.executor.memory', '4g') \ .config('spark.jars.packages', 'org.apache.hudi:hudi-spark3.4-bundle_2.12:0.14.0') \ .config('spark.serializer', 'org.apache.spark.serializer.KryoSerializer') \ .config('spark.sql.catalog.spark_catalog', 'org.apache.spark.sql.hudi.catalog.HoodieCatalog') \ .config('spark.sql.extensions', 'org.apache.spark.sql.hudi.HoodieSparkSessionExtension') \ .getOrCreate() ``` while writing data from pyspark pandas to hudi, I am getting following errors Event Error logs: ``` org.apache.hudi.exception.HoodieRemoteException: Failed to create marker file /d13b0ab8-8d0d-4743-9f31-b3df2982965f-0_0-2-0_20231229115643749.parquet.marker.CREATE Connect to 10.0.0.xxx:46131 [/10.0.0.xxx] failed: Connection timed out (Connection timed out) at org.apache.hudi.table.marker.TimelineServerBasedWriteMarkers.executeCreateMarkerRequest(TimelineServerBasedWriteMarkers.java:186) at org.apache.hudi.table.marker.TimelineServerBasedWriteMarkers.create(TimelineServerBasedWriteMarkers.java:141) at org.apache.hudi.table.marker.WriteMarkers.create(WriteMarkers.java:67) at org.apache.hudi.io.storage.row.HoodieRowCreateHandle.createMarkerFile(HoodieRowCreateHandle.java:281) at org.apache.hudi.io.storage.row.HoodieRowCreateHandle.<init>(HoodieRowCreateHandle.java:144) at org.apache.hudi.table.action.commit.BulkInsertDataInternalWriterHelper.createHandle(BulkInsertDataInternalWriterHelper.java:187) at org.apache.hudi.table.action.commit.BulkInsertDataInternalWriterHelper.getRowCreateHandle(BulkInsertDataInternalWriterHelper.java:173) at org.apache.hudi.table.action.commit.BulkInsertDataInternalWriterHelper.write(BulkInsertDataInternalWriterHelper.java:121) at org.apache.hudi.spark3.internal.HoodieBulkInsertDataInternalWriter.write(HoodieBulkInsertDataInternalWriter.java:62) at org.apache.hudi.spark3.internal.HoodieBulkInsertDataInternalWriter.write(HoodieBulkInsertDataInternalWriter.java:38) at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.write(WriteToDataSourceV2Exec.scala:516) at org.apache.spark.sql.execution.datasources.v2.WritingSparkTask.$anonfun$run$1(WriteToDataSourceV2Exec.scala:471) at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1563) at org.apache.spark.sql.execution.datasources.v2.WritingSparkTask.run(WriteToDataSourceV2Exec.scala:509) at org.apache.spark.sql.execution.datasources.v2.WritingSparkTask.run$(WriteToDataSourceV2Exec.scala:448) at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:514) at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.$anonfun$writeWithV2$2(WriteToDataSourceV2Exec.scala:411) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92) at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161) at org.apache.spark.scheduler.Task.run(Task.scala:139) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557) at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) at java.base/java.lang.Thread.run(Thread.java:829) Caused by: org.apache.hudi.org.apache.http.conn.HttpHostConnectException: Connect to 10.0.0.164:46131 [/10.0.0.164] failed: Connection timed out (Connection timed out) at org.apache.hudi.org.apache.http.impl.conn.DefaultHttpClientConnectionOperator.connect(DefaultHttpClientConnectionOperator.java:151) at org.apache.hudi.org.apache.http.impl.conn.PoolingHttpClientConnectionManager.connect(PoolingHttpClientConnectionManager.java:353) at org.apache.hudi.org.apache.http.impl.execchain.MainClientExec.establishRoute(MainClientExec.java:380) at org.apache.hudi.org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:236) at org.apache.hudi.org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:184) at org.apache.hudi.org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:88) at org.apache.hudi.org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110) at org.apache.hudi.org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:184) at org.apache.hudi.org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82) at org.apache.hudi.org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:107) at org.apache.hudi.org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55) at org.apache.hudi.org.apache.http.client.fluent.Request.execute(Request.java:151) at org.apache.hudi.table.marker.TimelineServerBasedWriteMarkers.executeRequestToTimelineServer(TimelineServerBasedWriteMarkers.java:232) at org.apache.hudi.table.marker.TimelineServerBasedWriteMarkers.executeCreateMarkerRequest(TimelineServerBasedWriteMarkers.java:182) ``` Is there a workaround? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org