[ https://issues.apache.org/jira/browse/HUDI-1078?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
wangxianghu resolved HUDI-1078. ------------------------------- Resolution: Fixed > Fix IllegalArgumentException in Delete data demo of Quick-Start Guide > --------------------------------------------------------------------- > > Key: HUDI-1078 > URL: https://issues.apache.org/jira/browse/HUDI-1078 > Project: Apache Hudi > Issue Type: Bug > Reporter: wangxianghu > Assignee: Trevorzhang > Priority: Minor > Labels: pull-request-available > Fix For: 0.6.0 > > > When running the [Delete data |#deletes]demo in Quick-Start Guide, I got this > Exception: > {code:java} > java.lang.IllegalArgumentException: Field "partitionPath" does not > exist.{code} > Step to reproduce: > {code:java} > // spark-shell > spark-2.4.4-bin-hadoop2.7/bin/spark-shell \ > --packages > org.apache.hudi:hudi-spark-bundle_2.11:0.5.3,org.apache.spark:spark-avro_2.11:2.4.4 > \ > --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' > import org.apache.hudi.QuickstartUtils._ > import scala.collection.JavaConversions._ > import org.apache.spark.sql.SaveMode._ > import org.apache.hudi.DataSourceReadOptions._ > import org.apache.hudi.DataSourceWriteOptions._ > import org.apache.hudi.config.HoodieWriteConfig._ > val tableName = "hudi_trips_cow" > val basePath = "file:///tmp/hudi_trips_cow" > val dataGen = new DataGenerator > // insert > val inserts = convertToStringList(dataGen.generateInserts(10)) > val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2)) > df.write.format("hudi"). > options(getQuickstartWriteConfigs). > option(PRECOMBINE_FIELD_OPT_KEY, "ts"). > option(RECORDKEY_FIELD_OPT_KEY, "uuid"). > option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath"). > option(TABLE_NAME, tableName). > mode(Overwrite). > save(basePath) > // create view > val tripsSnapshotDF = spark. > read. > format("hudi"). > load(basePath + "/*/*/*/*") > tripsSnapshotDF.createOrReplaceTempView("hudi_trips_snapshot") > // delete > spark. > read. > format("hudi"). > load(basePath + "/*/*/*/*"). > createOrReplaceTempView("hudi_trips_snapshot") > spark.sql("select uuid, partitionpath from hudi_trips_snapshot").count() > // fetch two records to be deleted > val ds = spark.sql("select uuid, partitionpath from > hudi_trips_snapshot").limit(2) > // issue deletes > val deletes = dataGen.generateDeletes(ds.collectAsList()){code} > will get this: > {code:java} > java.lang.IllegalArgumentException: Field "partitionPath" does not exist. > Available fields: uuid, partitionpath > at > org.apache.spark.sql.types.StructType$$anonfun$fieldIndex$1.apply(StructType.scala:303) > at > org.apache.spark.sql.types.StructType$$anonfun$fieldIndex$1.apply(StructType.scala:303) > at scala.collection.MapLike$class.getOrElse(MapLike.scala:128) > at scala.collection.AbstractMap.getOrElse(Map.scala:59) > at org.apache.spark.sql.types.StructType.fieldIndex(StructType.scala:302) > at > org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema.fieldIndex(rows.scala:187) > at org.apache.spark.sql.Row$class.getAs(Row.scala:333) > at > org.apache.spark.sql.catalyst.expressions.GenericRow.getAs(rows.scala:166) > at > org.apache.hudi.QuickstartUtils$DataGenerator.lambda$generateDeletes$1(QuickstartUtils.java:182) > at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) > at > java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948) > at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) > at > java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471) > at > java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708) > at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) > at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499) > at > org.apache.hudi.QuickstartUtils$DataGenerator.generateDeletes(QuickstartUtils.java:183) > ... 61 elided > {code} > -- This message was sent by Atlassian Jira (v8.3.4#803005)