voonhous commented on issue #17938:
URL: https://github.com/apache/hudi/issues/17938#issuecomment-3776309973

   If we make MercifulJsonConverter return a UTF8, this error will be thrown:
   
   
   
   ```log
   [ERROR]   Run 4: TestJsonKafkaSource.testJsonKafkaSourceWithEncodedDecimals 
» Spark Job aborted due to stage failure: Task 0 in stage 2.0 failed 1 times, 
most recent failure: Lost task 0.0 in stage 2.0 (TID 6) (be008c538ca3 executor 
driver): org.apache.spark.SparkRuntimeException: Error while encoding: 
java.lang.RuntimeException: org.apache.avro.util.Utf8 is not a valid external 
type for schema of string
   
assertnotnull(validateexternaltype(getexternalrowfield(assertnotnull(input[0, 
org.apache.spark.sql.Row, true]), 0, timestamp), LongType, ObjectType(class 
java.lang.Long)).longValue) AS timestamp#38958L
   assertnotnull(staticinvoke(class org.apache.spark.unsafe.types.UTF8String, 
StringType, fromString, 
validateexternaltype(getexternalrowfield(assertnotnull(input[0, 
org.apache.spark.sql.Row, true]), 1, _row_key), StringType, ObjectType(class 
java.lang.String)), true, false, true)) AS _row_key#38959
   assertnotnull(staticinvoke(class org.apache.spark.unsafe.types.UTF8String, 
StringType, fromString, 
validateexternaltype(getexternalrowfield(assertnotnull(input[0, 
org.apache.spark.sql.Row, true]), 2, rider), StringType, ObjectType(class 
java.lang.String)), true, false, true)) AS rider#38960
   assertnotnull(CheckOverflow(staticinvoke(class 
org.apache.spark.sql.types.Decimal$, DecimalType(10,6), fromDecimal, 
validateexternaltype(getexternalrowfield(assertnotnull(input[0, 
org.apache.spark.sql.Row, true]), 3, decfield), DecimalType(10,6), 
ObjectType(class java.lang.Object)), true, false, true), DecimalType(10,6))) AS 
decfield#38961
   assertnotnull(CheckOverflow(staticinvoke(class 
org.apache.spark.sql.types.Decimal$, DecimalType(4,2), fromDecimal, 
validateexternaltype(getexternalrowfield(assertnotnull(input[0, 
org.apache.spark.sql.Row, true]), 4, lowprecision), DecimalType(4,2), 
ObjectType(class java.lang.Object)), true, false, true), DecimalType(4,2))) AS 
lowprecision#38962
   assertnotnull(CheckOverflow(staticinvoke(class 
org.apache.spark.sql.types.Decimal$, DecimalType(32,12), fromDecimal, 
validateexternaltype(getexternalrowfield(assertnotnull(input[0, 
org.apache.spark.sql.Row, true]), 5, highprecision), DecimalType(32,12), 
ObjectType(class java.lang.Object)), true, false, true), DecimalType(32,12))) 
AS highprecision#38963
   assertnotnull(staticinvoke(class org.apache.spark.unsafe.types.UTF8String, 
StringType, fromString, 
validateexternaltype(getexternalrowfield(assertnotnull(input[0, 
org.apache.spark.sql.Row, true]), 6, driver), StringType, ObjectType(class 
java.lang.String)), true, false, true)) AS driver#38964
   
assertnotnull(validateexternaltype(getexternalrowfield(assertnotnull(input[0, 
org.apache.spark.sql.Row, true]), 7, fare), DoubleType, ObjectType(class 
java.lang.Double)).doubleValue) AS fare#38965
   
assertnotnull(validateexternaltype(getexternalrowfield(assertnotnull(input[0, 
org.apache.spark.sql.Row, true]), 8, _hoodie_is_deleted), BooleanType, 
ObjectType(class java.lang.Boolean)).booleanValue) AS _hoodie_is_deleted#38966.
        at 
org.apache.spark.sql.errors.QueryExecutionErrors$.expressionEncodingError(QueryExecutionErrors.scala:1416)
        at 
org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Serializer.apply(ExpressionEncoder.scala:217)
        at 
org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Serializer.apply(ExpressionEncoder.scala:200)
        at scala.collection.Iterator$$anon$10.next(Iterator.scala:461)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.hashAgg_doAggregateWithoutKey_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
 Source)
        at 
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
        at 
org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:43)
        at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
        at 
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)
        at 
org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:104)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
        at org.apache.spark.scheduler.Task.run(Task.scala:141)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
        at java.base/java.lang.Thread.run(Thread.java:829)
   Caused by: java.lang.RuntimeException: org.apache.avro.util.Utf8 is not a 
valid external type for schema of string
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.writeFields_0_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Serializer.apply(ExpressionEncoder.scala:214)
        ... 21 more
   ```
   
   Bunch of other errors will be thrown too:
   ```
   [ERROR] 
org.apache.hudi.utilities.sources.helpers.TestMercifulJsonToRowConverterJava8Api.basicConversion
   [ERROR]   Run 1: 
TestMercifulJsonToRowConverterJava8Api>TestMercifulJsonToRowConverterBase.basicConversion:100->TestMercifulJsonToRowConverterBase.validateSchemaCompatibility:690
 » IllegalArgument The value (John Smith) of the type 
(org.apache.avro.util.Utf8) cannot be converted to the string type
   [ERROR]   Run 2: 
TestMercifulJsonToRowConverterJava8Api>TestMercifulJsonToRowConverterBase.basicConversion:100->TestMercifulJsonToRowConverterBase.validateSchemaCompatibility:690
 » IllegalArgument The value (John Smith) of the type 
(org.apache.avro.util.Utf8) cannot be converted to the string type
   [ERROR]   Run 3: 
TestMercifulJsonToRowConverterJava8Api>TestMercifulJsonToRowConverterBase.basicConversion:100->TestMercifulJsonToRowConverterBase.validateSchemaCompatibility:690
 » IllegalArgument The value (John Smith) of the type 
(org.apache.avro.util.Utf8) cannot be converted to the string type
   [ERROR]   Run 4: 
TestMercifulJsonToRowConverterJava8Api>TestMercifulJsonToRowConverterBase.basicConversion:100->TestMercifulJsonToRowConverterBase.validateSchemaCompatibility:690
 » IllegalArgument The value (John Smith) of the type 
(org.apache.avro.util.Utf8) cannot be converted to the string type
   [INFO] 
   [ERROR] 
org.apache.hudi.utilities.sources.helpers.TestMercifulJsonToRowConverterJava8Api.conversionWithFieldNameAliases
   [ERROR]   Run 1: 
TestMercifulJsonToRowConverterJava8Api>TestMercifulJsonToRowConverterBase.conversionWithFieldNameAliases:627->TestMercifulJsonToRowConverterBase.validateSchemaCompatibility:690
 » IllegalArgument The value (John Smith) of the type 
(org.apache.avro.util.Utf8) cannot be converted to the string type
   [ERROR]   Run 2: 
TestMercifulJsonToRowConverterJava8Api>TestMercifulJsonToRowConverterBase.conversionWithFieldNameAliases:627->TestMercifulJsonToRowConverterBase.validateSchemaCompatibility:690
 » IllegalArgument The value (John Smith) of the type 
(org.apache.avro.util.Utf8) cannot be converted to the string type
   [ERROR]   Run 3: 
TestMercifulJsonToRowConverterJava8Api>TestMercifulJsonToRowConverterBase.conversionWithFieldNameAliases:627->TestMercifulJsonToRowConverterBase.validateSchemaCompatibility:690
 » IllegalArgument The value (John Smith) of the type 
(org.apache.avro.util.Utf8) cannot be converted to the string type
   [ERROR]   Run 4: 
TestMercifulJsonToRowConverterJava8Api>TestMercifulJsonToRowConverterBase.conversionWithFieldNameAliases:627->TestMercifulJsonToRowConverterBase.validateSchemaCompatibility:690
 » IllegalArgument The value (John Smith) of the type 
(org.apache.avro.util.Utf8) cannot be converted to the string type
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to