[
https://issues.apache.org/jira/browse/HUDI-9363?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Davis Zhang updated HUDI-9363:
------------------------------
Description:
Hudi 1.x
create pristine hudi table with record key string, sec_key column String, some
data column with array and map type.
insert some value.
create RLI on key column
create index on sec_key column, it will hit issues
I give the example hudi table data in the attached
if you run
{code:java}
// Import necessary packagesimport org.apache.spark.sql.SaveModeimport
org.apache.spark.sql.types._import
org.apache.hudi.config.HoodieWriteConfigimport
org.apache.hudi.config.HoodieIndexConfigimport
org.apache.hudi.index.HoodieIndeximport
org.apache.hudi.AvroConversionUtils.convertStructTypeToAvroSchema
val baseFilesPath = "/tmp/hudi-benchmark"val parquetFilesPath =
s"${baseFilesPath}/input"
// Define the schema based on your example recordval schema = StructType(Array(
StructField("key", StringType, true), StructField("secKey", StringType,
true), StructField("partition", StringType, true), StructField("ts",
LongType, false), StructField("city", StringType, true),
StructField("textField", StringType, true), StructField("decimalField",
FloatType, false), StructField("longField", LongType, false),
StructField("arrayField", ArrayType(IntegerType, true), true),
StructField("mapField", MapType(StringType, IntegerType), true),
StructField("round", IntegerType, false)))
// Path where you want to create the Hudi tableval hudiTablePath =
s"$baseFilesPath/output"
val tableName = "hudi"
// Convert Spark schema to Avro schemaval avroSchema =
convertStructTypeToAvroSchema(schema, s"${tableName}_records",
s"hoodie.${tableName}")
// Define the key configurations for Hudival hudiOptions = Map[String, String](
"hoodie.table.name" -> tableName, "hoodie.datasource.write.recordkey.field"
-> "key", "hoodie.datasource.write.partitionpath.field" -> "round",
"hoodie.datasource.write.table.name" -> tableName,
"hoodie.datasource.write.operation" -> "insert", "hoodie.table.create.schema"
-> avroSchema.toString(),)
spark.sql(s""" | CREATE TABLE IF NOT EXISTS hudi_1 | USING hudi
| LOCATION 'file://$hudiTablePath/default.hudi_1' | """)
spark.sql("refresh table hudi_1")spark.sql("select * from hudi_1 limit
10").show()spark.sql("select count(*) from hudi_1 group by round order by
1").show()spark.sql("select count(*) from hudi_1 group by key order by
1").show()
spark.sql("SET hoodie.metadata.enable=true")spark.sql("SET
hoodie.metadata.index.async=true")spark.sql("SET
hoodie.metadata.record.index.enable=true")spark.sql("SET
hoodie.write.concurrency.mode=optimistic_concurrency_control")spark.sql("SET
hoodie.cleaner.policy.failed.writes=LAZY")spark.sql("SET
hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.InProcessLockProvider")
spark.sql("CREATE INDEX idx_secKey on hudi_1 (secKey)")
{code}
you get
{code:java}
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
at org.apache.avro.Schema.getFields(Schema.java:283)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
at
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
at
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
at
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
at
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 3.0 in stage 62.0 (TID 205)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
at org.apache.avro.Schema.getFields(Schema.java:283)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
at
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
at
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
at
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
at
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 1.0 in stage 62.0 (TID 203)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
at org.apache.avro.Schema.getFields(Schema.java:283)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
at
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
at
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
at
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
at
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 0.0 in stage 62.0 (TID 202)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
at org.apache.avro.Schema.getFields(Schema.java:283)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
at
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
at
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
at
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
at
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 6.0 in stage 62.0 (TID 208)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
at org.apache.avro.Schema.getFields(Schema.java:283)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
at
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
at
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
at
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
at
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 5.0 in stage 62.0 (TID 207)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
at org.apache.avro.Schema.getFields(Schema.java:283)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
at
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
at
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
at
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
at
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829) {code}
was:
Hudi 1.x
create pristine hudi table with record key string, sec_key column String, some
data column with array and map type.
insert some value.
create RLI on key column
create index on sec_key column, it will hit issues
{code:java}
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
at org.apache.avro.Schema.getFields(Schema.java:283)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
at
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
at
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
at
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
at
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 3.0 in stage 62.0 (TID 205)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
at org.apache.avro.Schema.getFields(Schema.java:283)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
at
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
at
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
at
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
at
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 1.0 in stage 62.0 (TID 203)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
at org.apache.avro.Schema.getFields(Schema.java:283)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
at
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
at
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
at
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
at
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 0.0 in stage 62.0 (TID 202)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
at org.apache.avro.Schema.getFields(Schema.java:283)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
at
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
at
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
at
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
at
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 6.0 in stage 62.0 (TID 208)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
at org.apache.avro.Schema.getFields(Schema.java:283)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
at
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
at
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
at
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
at
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 5.0 in stage 62.0 (TID 207)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
at org.apache.avro.Schema.getFields(Schema.java:283)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
at
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
at
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
at
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
at
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
at
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
at
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
at
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829) {code}
> Cannot create secondary index if table has array[int] or map column type
> ------------------------------------------------------------------------
>
> Key: HUDI-9363
> URL: https://issues.apache.org/jira/browse/HUDI-9363
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: Davis Zhang
> Priority: Major
>
> Hudi 1.x
> create pristine hudi table with record key string, sec_key column String,
> some data column with array and map type.
> insert some value.
> create RLI on key column
> create index on sec_key column, it will hit issues
>
> I give the example hudi table data in the attached
> if you run
> {code:java}
> // Import necessary packagesimport org.apache.spark.sql.SaveModeimport
> org.apache.spark.sql.types._import
> org.apache.hudi.config.HoodieWriteConfigimport
> org.apache.hudi.config.HoodieIndexConfigimport
> org.apache.hudi.index.HoodieIndeximport
> org.apache.hudi.AvroConversionUtils.convertStructTypeToAvroSchema
> val baseFilesPath = "/tmp/hudi-benchmark"val parquetFilesPath =
> s"${baseFilesPath}/input"
> // Define the schema based on your example recordval schema =
> StructType(Array( StructField("key", StringType, true),
> StructField("secKey", StringType, true), StructField("partition",
> StringType, true), StructField("ts", LongType, false), StructField("city",
> StringType, true), StructField("textField", StringType, true),
> StructField("decimalField", FloatType, false), StructField("longField",
> LongType, false), StructField("arrayField", ArrayType(IntegerType, true),
> true), StructField("mapField", MapType(StringType, IntegerType), true),
> StructField("round", IntegerType, false)))
> // Path where you want to create the Hudi tableval hudiTablePath =
> s"$baseFilesPath/output"
> val tableName = "hudi"
> // Convert Spark schema to Avro schemaval avroSchema =
> convertStructTypeToAvroSchema(schema, s"${tableName}_records",
> s"hoodie.${tableName}")
> // Define the key configurations for Hudival hudiOptions = Map[String,
> String]( "hoodie.table.name" -> tableName,
> "hoodie.datasource.write.recordkey.field" -> "key",
> "hoodie.datasource.write.partitionpath.field" -> "round",
> "hoodie.datasource.write.table.name" -> tableName,
> "hoodie.datasource.write.operation" -> "insert",
> "hoodie.table.create.schema" -> avroSchema.toString(),)
> spark.sql(s""" | CREATE TABLE IF NOT EXISTS hudi_1 | USING hudi
> | LOCATION 'file://$hudiTablePath/default.hudi_1' | """)
> spark.sql("refresh table hudi_1")spark.sql("select * from hudi_1 limit
> 10").show()spark.sql("select count(*) from hudi_1 group by round order by
> 1").show()spark.sql("select count(*) from hudi_1 group by key order by
> 1").show()
> spark.sql("SET hoodie.metadata.enable=true")spark.sql("SET
> hoodie.metadata.index.async=true")spark.sql("SET
> hoodie.metadata.record.index.enable=true")spark.sql("SET
> hoodie.write.concurrency.mode=optimistic_concurrency_control")spark.sql("SET
> hoodie.cleaner.policy.failed.writes=LAZY")spark.sql("SET
> hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.InProcessLockProvider")
> spark.sql("CREATE INDEX idx_secKey on hudi_1 (secKey)")
> {code}
> you get
> {code:java}
> org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
> at org.apache.avro.Schema.getFields(Schema.java:283)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
> at
> org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
> at
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
> at
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
> at
> org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
> at
> org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
> at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
> at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
> at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
> at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
> at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
> at
> org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
> at
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
> at org.apache.spark.scheduler.Task.run(Task.scala:141)
> at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> at java.base/java.lang.Thread.run(Thread.java:829)
> 25/05/01 14:41:13 ERROR Executor: Exception in task 3.0 in stage 62.0 (TID
> 205)
> org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
> at org.apache.avro.Schema.getFields(Schema.java:283)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
> at
> org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
> at
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
> at
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
> at
> org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
> at
> org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
> at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
> at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
> at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
> at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
> at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
> at
> org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
> at
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
> at org.apache.spark.scheduler.Task.run(Task.scala:141)
> at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> at java.base/java.lang.Thread.run(Thread.java:829)
> 25/05/01 14:41:13 ERROR Executor: Exception in task 1.0 in stage 62.0 (TID
> 203)
> org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
> at org.apache.avro.Schema.getFields(Schema.java:283)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
> at
> org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
> at
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
> at
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
> at
> org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
> at
> org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
> at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
> at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
> at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
> at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
> at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
> at
> org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
> at
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
> at org.apache.spark.scheduler.Task.run(Task.scala:141)
> at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> at java.base/java.lang.Thread.run(Thread.java:829)
> 25/05/01 14:41:13 ERROR Executor: Exception in task 0.0 in stage 62.0 (TID
> 202)
> org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
> at org.apache.avro.Schema.getFields(Schema.java:283)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
> at
> org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
> at
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
> at
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
> at
> org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
> at
> org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
> at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
> at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
> at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
> at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
> at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
> at
> org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
> at
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
> at org.apache.spark.scheduler.Task.run(Task.scala:141)
> at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> at java.base/java.lang.Thread.run(Thread.java:829)
> 25/05/01 14:41:13 ERROR Executor: Exception in task 6.0 in stage 62.0 (TID
> 208)
> org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
> at org.apache.avro.Schema.getFields(Schema.java:283)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
> at
> org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
> at
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
> at
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
> at
> org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
> at
> org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
> at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
> at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
> at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
> at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
> at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
> at
> org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
> at
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
> at org.apache.spark.scheduler.Task.run(Task.scala:141)
> at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> at java.base/java.lang.Thread.run(Thread.java:829)
> 25/05/01 14:41:13 ERROR Executor: Exception in task 5.0 in stage 62.0 (TID
> 207)
> org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
> at org.apache.avro.Schema.getFields(Schema.java:283)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
> at
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
> at
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
> at
> org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
> at
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
> at
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
> at
> org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
> at
> org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
> at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
> at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
> at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
> at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
> at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
> at
> org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
> at
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
> at org.apache.spark.scheduler.Task.run(Task.scala:141)
> at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
> at
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> at java.base/java.lang.Thread.run(Thread.java:829) {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)