liiang-huang commented on issue #12261: URL: https://github.com/apache/hudi/issues/12261#issuecomment-2660709543
Hi @ad1happy2go @rangareddy This is still happening sometimes, there are not much logs other than heartbeat SaveIntoDataSourceCommand org.apache.hudi.Spark31DefaultSource@4fed7de0, Map(hoodie.payload.ordering.field -> daas_internal_ts, hoodie.datasource.hive_sync.database -> pay2bronze, hoodie.datasource.hive_sync.mode -> GLUE, hoodie.filesystem.view.incr.timeline.sync.enable -> false, hoodie.schema.on.read.enable -> true, path -> s3://pay2-datalake-prod-standard/datasets/bronze/paylite-payment-db1-w-slave-20220523-aurora-prod/paylite_payment/sub_payments-1661338391, hoodie.compact.inline.max.delta.seconds -> 3600, hoodie.datasource.write.precombine.field -> daas_internal_ts, hoodie.datasource.write.payload.class -> jp.ne.paypay.daas.data.util.DaaSOverwritePayload, hoodie.compact.inline.trigger.strategy -> NUM_OR_TIME, hoodie.cleaner.fileversions.retained -> 6, hoodie.datasource.meta.sync.enable -> true, hoodie.write.commit.callback.on -> true, hoodie.metadata.enable -> true, hoodie.datasource.hive_sync.table -> paylite_payment_sub_payments, hoodie.datasource.meta_sync.condition.sync -> false, hoodie.write.commit.callback.class -> jp.ne.paypay.daas.data.metrics.DaasHudiWriteCommitCallback, hoodie.index.type -> BLOOM, hoodie.datasource.write.operation -> upsert, hoodie.rollback.using.markers -> false, hoodie.metrics.reporter.type -> CLOUDWATCH, hoodie.datasource.write.recordkey.field -> id, hoodie.table.name -> paylite_payment_sub_payments, hoodie.datasource.write.table.type -> MERGE_ON_READ, hoodie.datasource.write.hive_style_partitioning -> true, hoodie.datasource.write.table.name -> paylite_payment_sub_payments, hoodie.cleaner.policy -> KEEP_LATEST_FILE_VERSIONS, hoodie.write.markers.type -> DIRECT, hoodie.compact.inline -> true, hoodie.datasource.compaction.async.enable -> false, hoodie.metrics.on -> true, hoodie.upsert.shuffle.parallelism -> 200, hoodie.meta.sync.client.tool.class -> org.apache.hudi.aws.sync.AwsGlueCatalogSyncTool, hoodie.datasource.write.partitionpath.field -> daas_date, hoodie.compact.inline.max.delta.commits -> 1, hoodie.payload.event.time. field -> daas_internal_ts Spark config: ``` spark.driver.memory | 80g -- | -- spark.driver.port | 35617 spark.dynamicAllocation.enabled | true spark.dynamicAllocation.executorIdleTimeout | 600s spark.dynamicAllocation.initialExecutors | 5 spark.dynamicAllocation.maxExecutors | 19 spark.dynamicAllocation.minExecutors | 3 spark.dynamicAllocation.shuffleTracking.enabled | true spark.eventLog.dir | /tmp/spark-event-logs/ spark.eventLog.enabled | true spark.executor.cores | 24 spark.executor.extraClassPath | /tmp:/opt/amazon/conf:/opt/amazon/glue-manifest.jar spark.executor.heartbeatInterval | 3000s spark.executor.id | driver spark.executor.memory | 96g spark.executor.memoryOverhead | 12g spark.extraListeners | com.amazonaws.services.glueexceptionanalysis.GlueExceptionAnalysisListener spark.files.overwrite | true spark.glue.connection-names | daas_ingester_connection spark.glue.enable-continuous-cloudwatch-log | false spark.glue.enable-continuous-log-filter | true spark.glue.enable-job-insights | true spark.glue.endpoint | https://glue-jes.ap-northeast-1.amazonaws.com spark.glue.extra-files | s3://pay2-datalake-prod-scripts/daas/log4jproperties/WARN/log4j.properties spark.glue.extra-jars | s3://pay2-datalake-prod-scripts/daas/libs/daas-data-core-assembly-latest.jar spark.glue.GLUE_COMMAND_CRITERIA | glueetl spark.glue.GLUE_TASK_GROUP_ID | 8944fe9d-6a5d-449b-8fe7-8b160959b19b spark.glue.GLUE_VERSION | 3.0 spark.glue.java-options | -XX:+UseCompressedOops -XX:+UseG1GC -XX:+PrintReferenceGC -verbose:gc -XX:+PrintGCDetails spark.glue.JOB_NAME | paylite_payment_sub_payments-binlog-ingester-1661338391 spark.glue.JOB_RUN_ID | jr_35d97af6b19f1fd8227f7a2db329e1e90fc05bbfebf90125180de591acb7298d spark.glue.USE_PROXY | true spark.glue.user-jars-first | false spark.glueAppInsightsLog.dir | /tmp/glue-app-insights-logs/ spark.glueExceptionAnalysisEventLog.dir | /tmp/glue-exception-analysis-logs/ spark.glueJobInsights.enabled | true spark.hadoop.aws.glue.endpoint | https://glue.ap-northeast-1.amazonaws.com spark.hadoop.aws.glue.proxy.host | 169.254.76.0 spark.hadoop.aws.glue.proxy.port | 8888 spark.hadoop.fs.s3.buffer.dir | /tmp/hadoop-spark/s3 spark.hadoop.fs.s3.impl | com.amazon.ws.emr.hadoop.fs.EmrFileSystem spark.hadoop.glue.michiganCredentialsProviderProxy | com.amazonaws.services.glue.remote.LakeformationCredentialsProvider spark.hadoop.hive.metastore.client.factory.class | com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory spark.hadoop.hive.metastore.warehouse.dir | /tmp/spark-warehouse spark.hadoop.lakeformation.credentials.url | http://169.254.76.0:9998/lakeformationcredentials spark.hadoop.mapred.output.committer.class | org.apache.hadoop.mapred.DirectOutputCommitter spark.hadoop.mapred.output.direct.EmrFileSystem | true spark.hadoop.mapred.output.direct.NativeS3FileSystem | true spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version | 2 spark.hadoop.mapreduce.fileoutputcommitter.marksuccessfuljobs | false spark.hadoop.parquet.enable.summary-metadata | false spark.jars | CanalBinlog2Hudi.scala.jar spark.kryo.registrator | org.apache.spark.HoodieSparkKryoRegistrar spark.master | jes spark.metrics.conf.*.sink.GlueCloudwatch.class | org.apache.spark.metrics.sink.GlueCloudwatchSink spark.metrics.conf.*.sink.GlueCloudwatch.jobName | paylite_payment_sub_payments-binlog-ingester-1661338391 spark.metrics.conf.*.sink.GlueCloudwatch.jobRunId | jr_35d97af6b19f1fd8227f7a2db329e1e90fc05bbfebf90125180de591acb7298d spark.metrics.conf.*.sink.GlueCloudwatch.namespace | Glue spark.metrics.conf.*.source.jvm.class | org.apache.spark.metrics.source.JvmSource spark.metrics.conf.*.source.s3.class | org.apache.spark.metrics.source.S3FileSystemSource spark.metrics.conf.*.source.system.class | org.apache.spark.metrics.source.SystemMetricsSource spark.metrics.conf.driver.source.aggregate.class | org.apache.spark.metrics.source.AggregateMetricsSource spark.network.timeout | 3100s spark.pyFiles | spark.pyspark.python | /usr/bin/python3 spark.rpc.askTimeout | 600 spark.scheduler.mode | FIFO spark.serializer | org.apache.spark.serializer.KryoSerializer spark.shuffle.service.enable | false spark.shuffle.service.enabled | false spark.sql.avro.datetimeRebaseModeInRead | CORRECTED spark.sql.avro.datetimeRebaseModeInWrite | CORRECTED spark.sql.catalogImplementation | hive spark.sql.extensions | org.apache.spark.sql.hudi.HoodieSparkSessionExtension spark.sql.legacy.avro.datetimeRebaseModeInRead | CORRECTED spark.sql.legacy.avro.datetimeRebaseModeInWrite | CORRECTED spark.sql.legacy.parquet.datetimeRebaseModeInRead | CORRECTED spark.sql.legacy.parquet.datetimeRebaseModeInWrite | CORRECTED spark.sql.legacy.parquet.int96RebaseModeInRead | CORRECTED spark.sql.legacy.parquet.int96RebaseModeInWrite | CORRECTED spark.sql.parquet.datetimeRebaseModeInRead | CORRECTED spark.sql.parquet.datetimeRebaseModeInWrite | CORRECTED spark.sql.parquet.fs.optimized.committer.optimization-enabled | true spark.sql.parquet.int96RebaseModeInRead | CORRECTED spark.sql.parquet.int96RebaseModeInWrite | CORRECTED spark.sql.parquet.output.committer.class | com.amazon.emr.committer.EmrOptimizedSparkSqlParquetOutputCommitter spark.sql.shuffle.partitions | 1500 spark.ui.enabled | false spark.unsafe.sorter.spill.read.ahead.enabled | false ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
