[ 
https://issues.apache.org/jira/browse/HUDI-9363?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Davis Zhang updated HUDI-9363:
------------------------------
    Description: 
Hudi 1.x

create pristine hudi table with record key string, sec_key column String, some 
data column with array and map type.

insert some value.

create RLI on key column

create index on sec_key column, it will hit issues

 

I give the example hudi table data in the attached

if you run
{code:java}
// Import necessary packagesimport org.apache.spark.sql.SaveModeimport 
org.apache.spark.sql.types._import 
org.apache.hudi.config.HoodieWriteConfigimport 
org.apache.hudi.config.HoodieIndexConfigimport 
org.apache.hudi.index.HoodieIndeximport 
org.apache.hudi.AvroConversionUtils.convertStructTypeToAvroSchema
val baseFilesPath = "/tmp/hudi-benchmark"val parquetFilesPath = 
s"${baseFilesPath}/input"
// Define the schema based on your example recordval schema = StructType(Array( 
 StructField("key", StringType, true),  StructField("secKey", StringType, 
true),  StructField("partition", StringType, true),  StructField("ts", 
LongType, false),  StructField("city", StringType, true),  
StructField("textField", StringType, true),  StructField("decimalField", 
FloatType, false),  StructField("longField", LongType, false),  
StructField("arrayField", ArrayType(IntegerType, true), true),  
StructField("mapField", MapType(StringType, IntegerType), true),  
StructField("round", IntegerType, false)))
// Path where you want to create the Hudi tableval hudiTablePath = 
s"$baseFilesPath/output"
val tableName = "hudi"
// Convert Spark schema to Avro schemaval avroSchema = 
convertStructTypeToAvroSchema(schema, s"${tableName}_records", 
s"hoodie.${tableName}")
// Define the key configurations for Hudival hudiOptions = Map[String, String]( 
 "hoodie.table.name" -> tableName,  "hoodie.datasource.write.recordkey.field" 
-> "key",  "hoodie.datasource.write.partitionpath.field" -> "round",  
"hoodie.datasource.write.table.name" -> tableName,  
"hoodie.datasource.write.operation" -> "insert",  "hoodie.table.create.schema" 
-> avroSchema.toString(),)
spark.sql(s"""     |   CREATE TABLE IF NOT EXISTS hudi_1     |   USING hudi     
|   LOCATION 'file://$hudiTablePath/default.hudi_1'     | """)
spark.sql("refresh table hudi_1")spark.sql("select * from hudi_1 limit 
10").show()spark.sql("select count(*) from hudi_1 group by round order by 
1").show()spark.sql("select count(*) from hudi_1 group by key order by 
1").show()
spark.sql("SET hoodie.metadata.enable=true")spark.sql("SET 
hoodie.metadata.index.async=true")spark.sql("SET 
hoodie.metadata.record.index.enable=true")spark.sql("SET 
hoodie.write.concurrency.mode=optimistic_concurrency_control")spark.sql("SET 
hoodie.cleaner.policy.failed.writes=LAZY")spark.sql("SET 
hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.InProcessLockProvider")
spark.sql("CREATE INDEX idx_secKey on hudi_1 (secKey)")
 {code}
you get
{code:java}
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
        at org.apache.avro.Schema.getFields(Schema.java:283)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
        at 
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
        at 
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
        at 
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
        at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
        at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
        at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
        at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
        at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
        at 
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
        at org.apache.spark.scheduler.Task.run(Task.scala:141)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
        at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 3.0 in stage 62.0 (TID 205)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
        at org.apache.avro.Schema.getFields(Schema.java:283)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
        at 
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
        at 
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
        at 
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
        at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
        at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
        at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
        at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
        at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
        at 
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
        at org.apache.spark.scheduler.Task.run(Task.scala:141)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
        at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 1.0 in stage 62.0 (TID 203)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
        at org.apache.avro.Schema.getFields(Schema.java:283)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
        at 
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
        at 
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
        at 
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
        at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
        at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
        at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
        at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
        at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
        at 
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
        at org.apache.spark.scheduler.Task.run(Task.scala:141)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
        at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 0.0 in stage 62.0 (TID 202)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
        at org.apache.avro.Schema.getFields(Schema.java:283)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
        at 
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
        at 
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
        at 
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
        at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
        at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
        at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
        at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
        at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
        at 
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
        at org.apache.spark.scheduler.Task.run(Task.scala:141)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
        at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 6.0 in stage 62.0 (TID 208)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
        at org.apache.avro.Schema.getFields(Schema.java:283)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
        at 
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
        at 
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
        at 
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
        at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
        at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
        at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
        at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
        at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
        at 
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
        at org.apache.spark.scheduler.Task.run(Task.scala:141)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
        at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 5.0 in stage 62.0 (TID 207)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
        at org.apache.avro.Schema.getFields(Schema.java:283)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
        at 
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
        at 
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
        at 
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
        at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
        at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
        at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
        at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
        at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
        at 
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
        at org.apache.spark.scheduler.Task.run(Task.scala:141)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
        at java.base/java.lang.Thread.run(Thread.java:829) {code}

  was:
Hudi 1.x

create pristine hudi table with record key string, sec_key column String, some 
data column with array and map type.

insert some value.

create RLI on key column

create index on sec_key column, it will hit issues

 
{code:java}
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
        at org.apache.avro.Schema.getFields(Schema.java:283)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
        at 
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
        at 
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
        at 
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
        at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
        at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
        at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
        at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
        at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
        at 
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
        at org.apache.spark.scheduler.Task.run(Task.scala:141)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
        at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 3.0 in stage 62.0 (TID 205)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
        at org.apache.avro.Schema.getFields(Schema.java:283)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
        at 
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
        at 
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
        at 
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
        at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
        at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
        at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
        at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
        at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
        at 
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
        at org.apache.spark.scheduler.Task.run(Task.scala:141)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
        at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 1.0 in stage 62.0 (TID 203)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
        at org.apache.avro.Schema.getFields(Schema.java:283)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
        at 
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
        at 
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
        at 
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
        at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
        at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
        at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
        at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
        at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
        at 
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
        at org.apache.spark.scheduler.Task.run(Task.scala:141)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
        at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 0.0 in stage 62.0 (TID 202)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
        at org.apache.avro.Schema.getFields(Schema.java:283)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
        at 
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
        at 
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
        at 
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
        at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
        at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
        at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
        at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
        at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
        at 
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
        at org.apache.spark.scheduler.Task.run(Task.scala:141)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
        at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 6.0 in stage 62.0 (TID 208)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
        at org.apache.avro.Schema.getFields(Schema.java:283)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
        at 
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
        at 
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
        at 
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
        at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
        at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
        at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
        at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
        at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
        at 
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
        at org.apache.spark.scheduler.Task.run(Task.scala:141)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
        at java.base/java.lang.Thread.run(Thread.java:829)
25/05/01 14:41:13 ERROR Executor: Exception in task 5.0 in stage 62.0 (TID 207)
org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
        at org.apache.avro.Schema.getFields(Schema.java:283)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
        at 
org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
        at 
org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
        at 
org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
        at 
org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
        at 
org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
        at 
org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
        at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
        at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
        at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
        at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
        at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
        at 
org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
        at org.apache.spark.scheduler.Task.run(Task.scala:141)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
        at java.base/java.lang.Thread.run(Thread.java:829) {code}


> Cannot create secondary index if table has array[int] or map column type
> ------------------------------------------------------------------------
>
>                 Key: HUDI-9363
>                 URL: https://issues.apache.org/jira/browse/HUDI-9363
>             Project: Apache Hudi
>          Issue Type: Bug
>            Reporter: Davis Zhang
>            Priority: Major
>
> Hudi 1.x
> create pristine hudi table with record key string, sec_key column String, 
> some data column with array and map type.
> insert some value.
> create RLI on key column
> create index on sec_key column, it will hit issues
>  
> I give the example hudi table data in the attached
> if you run
> {code:java}
> // Import necessary packagesimport org.apache.spark.sql.SaveModeimport 
> org.apache.spark.sql.types._import 
> org.apache.hudi.config.HoodieWriteConfigimport 
> org.apache.hudi.config.HoodieIndexConfigimport 
> org.apache.hudi.index.HoodieIndeximport 
> org.apache.hudi.AvroConversionUtils.convertStructTypeToAvroSchema
> val baseFilesPath = "/tmp/hudi-benchmark"val parquetFilesPath = 
> s"${baseFilesPath}/input"
> // Define the schema based on your example recordval schema = 
> StructType(Array(  StructField("key", StringType, true),  
> StructField("secKey", StringType, true),  StructField("partition", 
> StringType, true),  StructField("ts", LongType, false),  StructField("city", 
> StringType, true),  StructField("textField", StringType, true),  
> StructField("decimalField", FloatType, false),  StructField("longField", 
> LongType, false),  StructField("arrayField", ArrayType(IntegerType, true), 
> true),  StructField("mapField", MapType(StringType, IntegerType), true),  
> StructField("round", IntegerType, false)))
> // Path where you want to create the Hudi tableval hudiTablePath = 
> s"$baseFilesPath/output"
> val tableName = "hudi"
> // Convert Spark schema to Avro schemaval avroSchema = 
> convertStructTypeToAvroSchema(schema, s"${tableName}_records", 
> s"hoodie.${tableName}")
> // Define the key configurations for Hudival hudiOptions = Map[String, 
> String](  "hoodie.table.name" -> tableName,  
> "hoodie.datasource.write.recordkey.field" -> "key",  
> "hoodie.datasource.write.partitionpath.field" -> "round",  
> "hoodie.datasource.write.table.name" -> tableName,  
> "hoodie.datasource.write.operation" -> "insert",  
> "hoodie.table.create.schema" -> avroSchema.toString(),)
> spark.sql(s"""     |   CREATE TABLE IF NOT EXISTS hudi_1     |   USING hudi   
>   |   LOCATION 'file://$hudiTablePath/default.hudi_1'     | """)
> spark.sql("refresh table hudi_1")spark.sql("select * from hudi_1 limit 
> 10").show()spark.sql("select count(*) from hudi_1 group by round order by 
> 1").show()spark.sql("select count(*) from hudi_1 group by key order by 
> 1").show()
> spark.sql("SET hoodie.metadata.enable=true")spark.sql("SET 
> hoodie.metadata.index.async=true")spark.sql("SET 
> hoodie.metadata.record.index.enable=true")spark.sql("SET 
> hoodie.write.concurrency.mode=optimistic_concurrency_control")spark.sql("SET 
> hoodie.cleaner.policy.failed.writes=LAZY")spark.sql("SET 
> hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.InProcessLockProvider")
> spark.sql("CREATE INDEX idx_secKey on hudi_1 (secKey)")
>  {code}
> you get
> {code:java}
> org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
>       at org.apache.avro.Schema.getFields(Schema.java:283)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
>       at 
> org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
>       at 
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
>       at 
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
>       at 
> org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
>       at 
> org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
>       at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
>       at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
>       at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
>       at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
>       at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
>       at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
>       at 
> org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
>       at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
>       at 
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
>       at org.apache.spark.scheduler.Task.run(Task.scala:141)
>       at 
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
>       at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
>       at java.base/java.lang.Thread.run(Thread.java:829)
> 25/05/01 14:41:13 ERROR Executor: Exception in task 3.0 in stage 62.0 (TID 
> 205)
> org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
>       at org.apache.avro.Schema.getFields(Schema.java:283)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
>       at 
> org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
>       at 
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
>       at 
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
>       at 
> org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
>       at 
> org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
>       at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
>       at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
>       at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
>       at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
>       at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
>       at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
>       at 
> org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
>       at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
>       at 
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
>       at org.apache.spark.scheduler.Task.run(Task.scala:141)
>       at 
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
>       at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
>       at java.base/java.lang.Thread.run(Thread.java:829)
> 25/05/01 14:41:13 ERROR Executor: Exception in task 1.0 in stage 62.0 (TID 
> 203)
> org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
>       at org.apache.avro.Schema.getFields(Schema.java:283)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
>       at 
> org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
>       at 
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
>       at 
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
>       at 
> org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
>       at 
> org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
>       at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
>       at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
>       at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
>       at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
>       at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
>       at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
>       at 
> org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
>       at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
>       at 
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
>       at org.apache.spark.scheduler.Task.run(Task.scala:141)
>       at 
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
>       at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
>       at java.base/java.lang.Thread.run(Thread.java:829)
> 25/05/01 14:41:13 ERROR Executor: Exception in task 0.0 in stage 62.0 (TID 
> 202)
> org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
>       at org.apache.avro.Schema.getFields(Schema.java:283)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
>       at 
> org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
>       at 
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
>       at 
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
>       at 
> org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
>       at 
> org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
>       at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
>       at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
>       at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
>       at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
>       at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
>       at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
>       at 
> org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
>       at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
>       at 
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
>       at org.apache.spark.scheduler.Task.run(Task.scala:141)
>       at 
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
>       at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
>       at java.base/java.lang.Thread.run(Thread.java:829)
> 25/05/01 14:41:13 ERROR Executor: Exception in task 6.0 in stage 62.0 (TID 
> 208)
> org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
>       at org.apache.avro.Schema.getFields(Schema.java:283)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
>       at 
> org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
>       at 
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
>       at 
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
>       at 
> org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
>       at 
> org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
>       at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
>       at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
>       at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
>       at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
>       at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
>       at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
>       at 
> org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
>       at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
>       at 
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
>       at org.apache.spark.scheduler.Task.run(Task.scala:141)
>       at 
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
>       at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
>       at java.base/java.lang.Thread.run(Thread.java:829)
> 25/05/01 14:41:13 ERROR Executor: Exception in task 5.0 in stage 62.0 (TID 
> 207)
> org.apache.avro.AvroRuntimeException: Not a record: ["null","int"]
>       at org.apache.avro.Schema.getFields(Schema.java:283)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1236)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1249)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1258)
>       at 
> org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion(HoodieAvroUtils.java:1241)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:175)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:103)
>       at 
> org.apache.hudi.io.hadoop.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:82)
>       at 
> org.apache.hudi.common.table.log.HoodieFileSliceReader.<init>(HoodieFileSliceReader.java:56)
>       at 
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.createSecondaryIndexGenerator(SecondaryIndexRecordGenerationUtils.java:312)
>       at 
> org.apache.hudi.metadata.SecondaryIndexRecordGenerationUtils.lambda$readSecondaryKeysFromFileSlices$12f25254$1(SecondaryIndexRecordGenerationUtils.java:268)
>       at 
> org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:160)
>       at 
> org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
>       at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
>       at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
>       at org.apache.spark.util.Iterators$.size(Iterators.scala:29)
>       at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1787)
>       at org.apache.spark.rdd.RDD.$anonfun$count$1(RDD.scala:1296)
>       at org.apache.spark.rdd.RDD.$anonfun$count$1$adapted(RDD.scala:1296)
>       at 
> org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2433)
>       at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
>       at 
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
>       at org.apache.spark.scheduler.Task.run(Task.scala:141)
>       at 
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
>       at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
>       at java.base/java.lang.Thread.run(Thread.java:829) {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to