[ https://issues.apache.org/jira/browse/HUDI-3448?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Harshal Patil reassigned HUDI-3448: ----------------------------------- Assignee: sivabalan narayanan (was: Harshal Patil) > DeltaStreamer process does not exit for HoodieIncrSource when there is > exception thrown and spark context is stopped > --------------------------------------------------------------------------------------------------------------------- > > Key: HUDI-3448 > URL: https://issues.apache.org/jira/browse/HUDI-3448 > Project: Apache Hudi > Issue Type: Bug > Reporter: Harshal Patil > Assignee: sivabalan narayanan > Priority: Minor > > S3EventsHoodieIncrSource while processing from metadata s3 source table fails > if metadata table is empty (no data files yet ) > Fails to get schema and exception is thrown , but somehow process is not > terminating . > Spark Context is stopped too . Desired outcome -> process should terminate so > that it could be retried again. > > 22/02/16 12:10:36 INFO BaseHoodieTableFileIndex: Refresh table > s3_metadata_table, spent: 9088 ms > 22/02/16 12:10:36 WARN TableSchemaResolver: Failed to read operation field > from avro schema > java.lang.IllegalArgumentException: Could not find any data file written for > commit, so could not get schema for table > s3a://s3-datasource-metadata-c8f381dd-e53a-5faa-81fb-fc7a72f6aac2/harshal-feb-testing/s3_metadata_database/s3_metadata_table > at > org.apache.hudi.common.table.TableSchemaResolver.getTableParquetSchemaFromDataFile(TableSchemaResolver.java:88) > at > org.apache.hudi.common.table.TableSchemaResolver.getTableAvroSchemaFromDataFile(TableSchemaResolver.java:119) > at > org.apache.hudi.common.table.TableSchemaResolver.hasOperationField(TableSchemaResolver.java:480) > at > org.apache.hudi.common.table.TableSchemaResolver.<init>(TableSchemaResolver.java:65) > at > org.apache.hudi.SparkHoodieTableFileIndex.$anonfun$schema$1(SparkHoodieTableFileIndex.scala:77) > at scala.Option.getOrElse(Option.scala:189) > at > org.apache.hudi.SparkHoodieTableFileIndex.schema$lzycompute(SparkHoodieTableFileIndex.scala:76) > at > org.apache.hudi.SparkHoodieTableFileIndex.schema(SparkHoodieTableFileIndex.scala:76) > at > org.apache.hudi.SparkHoodieTableFileIndex._partitionSchemaFromProperties$lzycompute(SparkHoodieTableFileIndex.scala:89) > at > org.apache.hudi.SparkHoodieTableFileIndex._partitionSchemaFromProperties(SparkHoodieTableFileIndex.scala:86) > at > org.apache.hudi.SparkHoodieTableFileIndex.partitionSchema(SparkHoodieTableFileIndex.scala:129) > at > org.apache.hudi.DefaultSource.getBaseFileOnlyView(DefaultSource.scala:205) > at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:121) > at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:69) > at > org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:350) > at > org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:274) > at > org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:245) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:245) > at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:188) > at > org.apache.hudi.utilities.sources.S3EventsHoodieIncrSource.fetchNextBatch(S3EventsHoodieIncrSource.java:126) > at > org.apache.hudi.utilities.sources.RowSource.fetchNewData(RowSource.java:43) > at org.apache.hudi.utilities.sources.Source.fetchNext(Source.java:76) > at > org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter.fetchNewDataInAvroFormat(SourceFormatAdapter.java:69) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.fetchFromSource(DeltaSync.java:457) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.readFromSource(DeltaSync.java:390) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:298) > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer$DeltaSyncService.lambda$startService$0(HoodieDeltaStreamer.java:652) > at > java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:750) > 22/02/16 12:10:36 ERROR HoodieDeltaStreamer: Shutting down delta-sync due to > exception > org.apache.hudi.exception.HoodieException: Failed to read schema from commit > metadata > at > org.apache.hudi.common.table.TableSchemaResolver.getTableSchemaFromCommitMetadata(TableSchemaResolver.java:244) > at > org.apache.hudi.common.table.TableSchemaResolver.getTableSchemaFromCommitMetadata(TableSchemaResolver.java:215) > at > org.apache.hudi.common.table.TableSchemaResolver.getTableAvroSchema(TableSchemaResolver.java:140) > at > org.apache.hudi.common.table.TableSchemaResolver.getTableAvroSchema(TableSchemaResolver.java:129) > at > org.apache.hudi.SparkHoodieTableFileIndex.$anonfun$schema$1(SparkHoodieTableFileIndex.scala:78) > at scala.Option.getOrElse(Option.scala:189) > at > org.apache.hudi.SparkHoodieTableFileIndex.schema$lzycompute(SparkHoodieTableFileIndex.scala:76) > at > org.apache.hudi.SparkHoodieTableFileIndex.schema(SparkHoodieTableFileIndex.scala:76) > at > org.apache.hudi.SparkHoodieTableFileIndex._partitionSchemaFromProperties$lzycompute(SparkHoodieTableFileIndex.scala:89) > at > org.apache.hudi.SparkHoodieTableFileIndex._partitionSchemaFromProperties(SparkHoodieTableFileIndex.scala:86) > at > org.apache.hudi.SparkHoodieTableFileIndex.partitionSchema(SparkHoodieTableFileIndex.scala:129) > at > org.apache.hudi.DefaultSource.getBaseFileOnlyView(DefaultSource.scala:205) > at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:121) > at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:69) > at > org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:350) > at > org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:274) > at > org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:245) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:245) > at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:188) > at > org.apache.hudi.utilities.sources.S3EventsHoodieIncrSource.fetchNextBatch(S3EventsHoodieIncrSource.java:126) > at > org.apache.hudi.utilities.sources.RowSource.fetchNewData(RowSource.java:43) > at org.apache.hudi.utilities.sources.Source.fetchNext(Source.java:76) > at > org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter.fetchNewDataInAvroFormat(SourceFormatAdapter.java:69) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.fetchFromSource(DeltaSync.java:457) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.readFromSource(DeltaSync.java:390) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:298) > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer$DeltaSyncService.lambda$startService$0(HoodieDeltaStreamer.java:652) > at > java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:750) > Caused by: org.apache.avro.AvroRuntimeException: Not a record: "null" > at org.apache.avro.Schema.getFields(Schema.java:279) > at > org.apache.hudi.avro.HoodieAvroUtils.addMetadataFields(HoodieAvroUtils.java:207) > at > org.apache.hudi.common.table.TableSchemaResolver.getTableSchemaFromCommitMetadata(TableSchemaResolver.java:240) > ... 31 more > 22/02/16 12:10:36 INFO HoodieDeltaStreamer: Delta Sync shutdown. Error ?true > 22/02/16 12:10:36 ERROR HoodieAsyncService: Service shutdown with error > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: Failed to read schema from commit > metadata > at > java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) > at java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908) > at > org.apache.hudi.async.HoodieAsyncService.waitForShutdown(HoodieAsyncService.java:89) > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.lambda$sync$1(HoodieDeltaStreamer.java:182) > at org.apache.hudi.common.util.Option.ifPresent(Option.java:96) > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.sync(HoodieDeltaStreamer.java:179) > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.main(HoodieDeltaStreamer.java:526) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) > at > org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:955) > at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180) > at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203) > at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90) > at > org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1043) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1052) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > Caused by: org.apache.hudi.exception.HoodieException: Failed to read schema > from commit metadata > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer$DeltaSyncService.lambda$startService$0(HoodieDeltaStreamer.java:678) > at > java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:750) > Caused by: org.apache.hudi.exception.HoodieException: Failed to read schema > from commit metadata > at > org.apache.hudi.common.table.TableSchemaResolver.getTableSchemaFromCommitMetadata(TableSchemaResolver.java:244) > at > org.apache.hudi.common.table.TableSchemaResolver.getTableSchemaFromCommitMetadata(TableSchemaResolver.java:215) > at > org.apache.hudi.common.table.TableSchemaResolver.getTableAvroSchema(TableSchemaResolver.java:140) > at > org.apache.hudi.common.table.TableSchemaResolver.getTableAvroSchema(TableSchemaResolver.java:129) > at > org.apache.hudi.SparkHoodieTableFileIndex.$anonfun$schema$1(SparkHoodieTableFileIndex.scala:78) > at scala.Option.getOrElse(Option.scala:189) > at > org.apache.hudi.SparkHoodieTableFileIndex.schema$lzycompute(SparkHoodieTableFileIndex.scala:76) > at > org.apache.hudi.SparkHoodieTableFileIndex.schema(SparkHoodieTableFileIndex.scala:76) > at > org.apache.hudi.SparkHoodieTableFileIndex._partitionSchemaFromProperties$lzycompute(SparkHoodieTableFileIndex.scala:89) > at > org.apache.hudi.SparkHoodieTableFileIndex._partitionSchemaFromProperties(SparkHoodieTableFileIndex.scala:86) > at > org.apache.hudi.SparkHoodieTableFileIndex.partitionSchema(SparkHoodieTableFileIndex.scala:129) > at > org.apache.hudi.DefaultSource.getBaseFileOnlyView(DefaultSource.scala:205) > at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:121) > at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:69) > at > org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:350) > at > org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:274) > at > org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:245) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:245) > at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:188) > at > org.apache.hudi.utilities.sources.S3EventsHoodieIncrSource.fetchNextBatch(S3EventsHoodieIncrSource.java:126) > at > org.apache.hudi.utilities.sources.RowSource.fetchNewData(RowSource.java:43) > at org.apache.hudi.utilities.sources.Source.fetchNext(Source.java:76) > at > org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter.fetchNewDataInAvroFormat(SourceFormatAdapter.java:69) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.fetchFromSource(DeltaSync.java:457) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.readFromSource(DeltaSync.java:390) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:298) > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer$DeltaSyncService.lambda$startService$0(HoodieDeltaStreamer.java:652) > ... 4 more > Caused by: org.apache.avro.AvroRuntimeException: Not a record: "null" > at org.apache.avro.Schema.getFields(Schema.java:279) > at > org.apache.hudi.avro.HoodieAvroUtils.addMetadataFields(HoodieAvroUtils.java:207) > at > org.apache.hudi.common.table.TableSchemaResolver.getTableSchemaFromCommitMetadata(TableSchemaResolver.java:240) > ... 31 more > 22/02/16 12:10:36 INFO HoodieDeltaStreamer: DeltaSync shutdown. Closing write > client. Error?true > 22/02/16 12:10:36 INFO DeltaSync: Shutting down embedded timeline server > 22/02/16 12:10:37 INFO SparkUI: Stopped Spark web UI at > [http://ds-job-s3-incr-4d8f612b-0cdd-4c3b-62ba4f7f0270638e-driver-svc.harshal-feb-testing.svc:4040|http://ds-job-s3-incr-4d8f612b-0cdd-4c3b-62ba4f7f0270638e-driver-svc.harshal-feb-testing.svc:4040/] > 22/02/16 12:10:37 INFO KubernetesClusterSchedulerBackend: Shutting down all > executors > 22/02/16 12:10:37 INFO > KubernetesClusterSchedulerBackend$KubernetesDriverEndpoint: Asking each > executor to shut down > 22/02/16 12:10:37 WARN ExecutorPodsWatchSnapshotSource: Kubernetes client has > been closed. > 22/02/16 12:10:38 INFO MapOutputTrackerMasterEndpoint: > MapOutputTrackerMasterEndpoint stopped! > 22/02/16 12:10:38 INFO MemoryStore: MemoryStore cleared > 22/02/16 12:10:38 INFO BlockManager: BlockManager stopped > 22/02/16 12:10:38 INFO BlockManagerMaster: BlockManagerMaster stopped > 22/02/16 12:10:38 INFO > OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: > OutputCommitCoordinator stopped! > 22/02/16 12:10:38 INFO SparkContext: Successfully stopped SparkContext > Exception in thread "main" org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: Failed to read schema from commit > metadata > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.lambda$sync$1(HoodieDeltaStreamer.java:184) > at org.apache.hudi.common.util.Option.ifPresent(Option.java:96) > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.sync(HoodieDeltaStreamer.java:179) > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.main(HoodieDeltaStreamer.java:526) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) > at > org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:955) > at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180) > at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203) > at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90) > at > org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1043) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1052) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > Caused by: java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: Failed to read schema from commit > metadata > at > java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) > at java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908) > at > org.apache.hudi.async.HoodieAsyncService.waitForShutdown(HoodieAsyncService.java:89) > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.lambda$sync$1(HoodieDeltaStreamer.java:182) > ... 15 more > Caused by: org.apache.hudi.exception.HoodieException: Failed to read schema > from commit metadata > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer$DeltaSyncService.lambda$startService$0(HoodieDeltaStreamer.java:678) > at > java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:750) > Caused by: org.apache.hudi.exception.HoodieException: Failed to read schema > from commit metadata > at > org.apache.hudi.common.table.TableSchemaResolver.getTableSchemaFromCommitMetadata(TableSchemaResolver.java:244) > at > org.apache.hudi.common.table.TableSchemaResolver.getTableSchemaFromCommitMetadata(TableSchemaResolver.java:215) > at > org.apache.hudi.common.table.TableSchemaResolver.getTableAvroSchema(TableSchemaResolver.java:140) > at > org.apache.hudi.common.table.TableSchemaResolver.getTableAvroSchema(TableSchemaResolver.java:129) > at > org.apache.hudi.SparkHoodieTableFileIndex.$anonfun$schema$1(SparkHoodieTableFileIndex.scala:78) > at scala.Option.getOrElse(Option.scala:189) > at > org.apache.hudi.SparkHoodieTableFileIndex.schema$lzycompute(SparkHoodieTableFileIndex.scala:76) > at > org.apache.hudi.SparkHoodieTableFileIndex.schema(SparkHoodieTableFileIndex.scala:76) > at > org.apache.hudi.SparkHoodieTableFileIndex._partitionSchemaFromProperties$lzycompute(SparkHoodieTableFileIndex.scala:89) > at > org.apache.hudi.SparkHoodieTableFileIndex._partitionSchemaFromProperties(SparkHoodieTableFileIndex.scala:86) > at > org.apache.hudi.SparkHoodieTableFileIndex.partitionSchema(SparkHoodieTableFileIndex.scala:129) > at > org.apache.hudi.DefaultSource.getBaseFileOnlyView(DefaultSource.scala:205) > at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:121) > at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:69) > at > org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:350) > at > org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:274) > at > org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:245) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:245) > at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:188) > at > org.apache.hudi.utilities.sources.S3EventsHoodieIncrSource.fetchNextBatch(S3EventsHoodieIncrSource.java:126) > at > org.apache.hudi.utilities.sources.RowSource.fetchNewData(RowSource.java:43) > at org.apache.hudi.utilities.sources.Source.fetchNext(Source.java:76) > at > org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter.fetchNewDataInAvroFormat(SourceFormatAdapter.java:69) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.fetchFromSource(DeltaSync.java:457) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.readFromSource(DeltaSync.java:390) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:298) > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer$DeltaSyncService.lambda$startService$0(HoodieDeltaStreamer.java:652) > ... 4 more > Caused by: org.apache.avro.AvroRuntimeException: Not a record: "null" > at org.apache.avro.Schema.getFields(Schema.java:279) > at > org.apache.hudi.avro.HoodieAvroUtils.addMetadataFields(HoodieAvroUtils.java:207) > at > org.apache.hudi.common.table.TableSchemaResolver.getTableSchemaFromCommitMetadata(TableSchemaResolver.java:240) > ... 31 more -- This message was sent by Atlassian Jira (v8.20.1#820001)