[ https://issues.apache.org/jira/browse/HUDI-2514?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
董可伦 updated HUDI-2514: ---------------------- Attachment: t3.png > Add default hiveTableSerdeProperties for Spark SQL when sync Hive > ----------------------------------------------------------------- > > Key: HUDI-2514 > URL: https://issues.apache.org/jira/browse/HUDI-2514 > Project: Apache Hudi > Issue Type: Improvement > Components: hive-sync, spark-sql > Reporter: 董可伦 > Assignee: 董可伦 > Priority: Critical > Labels: hudi-on-call, pull-request-available > Fix For: 0.11.0 > > Attachments: t1.png, t2.png, t3.png > > Original Estimate: 0.25h > Remaining Estimate: 0.25h > > If do not add the default hiveTableSerdeProperties,Spark SQL will not work > properly > For example,update: > > {code:java} > update hudi.test_hudi_table set price=333 where id=111; > {code} > > It will throw an Exception: > {code:java} > 21/10/03 17:41:15 ERROR SparkSQLDriver: Failed in [update > hudi.test_hudi_table set price=333 where id=111] > java.lang.AssertionError: assertion failed: There are no primary key in table > `hudi`.`test_hudi_table`, cannot execute update operator > at scala.Predef$.assert(Predef.scala:170) > at > org.apache.spark.sql.hudi.command.UpdateHoodieTableCommand.buildHoodieConfig(UpdateHoodieTableCommand.scala:91) > at > org.apache.spark.sql.hudi.command.UpdateHoodieTableCommand.run(UpdateHoodieTableCommand.scala:73) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) > at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194) > at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194) > at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370) > at > org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79) > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:694) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:62) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:371) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:274) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) > at > org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845) > at > org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161) > at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184) > at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86) > at > org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > java.lang.AssertionError: assertion failed: There are no primary key in table > `hudi`.`test_hudi_table`, cannot execute update operator > at scala.Predef$.assert(Predef.scala:170) > at > org.apache.spark.sql.hudi.command.UpdateHoodieTableCommand.buildHoodieConfig(UpdateHoodieTableCommand.scala:91) > at > org.apache.spark.sql.hudi.command.UpdateHoodieTableCommand.run(UpdateHoodieTableCommand.scala:73) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) > at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194) > at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194) > at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370) > at > org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79) > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:694) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:62) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:371) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:274) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) > at > org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845) > at > org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161) > at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184) > at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86) > at > org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > {code} > > update:2022/1/19 > > ```bash > hive > create database test_hudi; > ``` > ```scsala > spark-shell --master yarn --deploy-mode client --executor-memory 2G > --num-executors 3 --executor-cores 2 --driver-memory 4G --driver-cores 2 > --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' --conf > 'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension' > --principal .. --keytab .. > import org.apache.hudi.DataSourceWriteOptions._ > import org.apache.hudi.QuickstartUtils.\{DataGenerator, convertToStringList, > getQuickstartWriteConfigs} > import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME > import org.apache.spark.sql.SaveMode._ > import org.apache.spark.sql.\{SaveMode, SparkSession} > import org.apache.spark.sql.functions.lit > import org.apache.hudi.DataSourceReadOptions._ > import org.apache.hudi.config.HoodieWriteConfig > import org.apache.hudi.keygen.SimpleKeyGenerator > import org.apache.hudi.common.model.\{DefaultHoodieRecordPayload, > HoodiePayloadProps} > import org.apache.hudi.io.HoodieMergeHandle > import org.apache.hudi.common.table.HoodieTableConfig > import org.apache.spark.sql.functions._ > import spark.implicits._ > val df = Seq((1, "a1", 10, 1000, "2022-01-19")).toDF("id", "name", "value", > "ts", "dt") > df.write.format("hudi"). > option(HoodieWriteConfig.TBL_NAME.key, "test_hudi_table_sync_hive"). > option(TABLE_TYPE.key, COW_TABLE_TYPE_OPT_VAL). > option(RECORDKEY_FIELD.key, "id"). > option(PRECOMBINE_FIELD.key, "ts"). > option(KEYGENERATOR_CLASS_OPT_KEY, > "org.apache.hudi.keygen.NonpartitionedKeyGenerator"). > option("hoodie.datasource.write.partitionpath.field", ""). > option("hoodie.metadata.enable", false). > option(KEYGENERATOR_CLASS_OPT_KEY, > "org.apache.hudi.keygen.ComplexKeyGenerator"). > option(META_SYNC_ENABLED.key(), true). > option(HIVE_USE_JDBC.key(), false). > option(HIVE_DATABASE.key(), "test_hudi"). > option(HIVE_AUTO_CREATE_DATABASE.key(), true). > option(HIVE_TABLE.key(), "test_hudi_table_sync_hive"). > option(HIVE_PARTITION_EXTRACTOR_CLASS.key(), > "org.apache.hudi.hive.MultiPartKeysValueExtractor"). > mode("overwrite"). > save("/test_hudi/test_hudi_table_sync_hive") > update test_hudi.test_hudi_table_sync_hive set name='a2' where id=1; > ``` > hive > ```sql > show create table test_hudi_table_sync_hive; > +----------------------------------------------------+ > | createtab_stmt | > +----------------------------------------------------+ > | CREATE EXTERNAL TABLE `test_hudi_table_sync_hive`( | > | `_hoodie_commit_time` string, | > | `_hoodie_commit_seqno` string, | > | `_hoodie_record_key` string, | > | `_hoodie_partition_path` string, | > | `_hoodie_file_name` string, | > | `id` int, | > | `name` string, | > | `value` int, | > | `ts` int, | > | `dt` string) | > | ROW FORMAT SERDE | > | 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' | > | WITH SERDEPROPERTIES ( | > | 'hoodie.query.as.ro.table'='false', | > | 'path'='/test_hudi/test_hudi_table_sync_hive') | > | STORED AS INPUTFORMAT | > | 'org.apache.hudi.hadoop.HoodieParquetInputFormat' | > | OUTPUTFORMAT | > | 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' | > | LOCATION | > | 'hdfs://cluster1/test_hudi/test_hudi_table_sync_hive' | > | TBLPROPERTIES ( | > | 'last_commit_time_sync'='20220119110215185', | > | 'spark.sql.sources.provider'='hudi', | > | 'spark.sql.sources.schema.numParts'='1', | > | > 'spark.sql.sources.schema.part.0'='\{"type":"struct","fields":[{"name":"_hoodie_commit_time","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_commit_seqno","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_record_key","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_partition_path","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_file_name","type":"string","nullable":true,"metadata":{}},\{"name":"id","type":"integer","nullable":false,"metadata":{}},\{"name":"name","type":"string","nullable":true,"metadata":{}},\{"name":"value","type":"integer","nullable":false,"metadata":{}},\{"name":"ts","type":"integer","nullable":false,"metadata":{}},\{"name":"dt","type":"string","nullable":true,"metadata":{}}]}', > | > | 'transient_lastDdlTime'='1642561355') | > +----------------------------------------------------+ > 28 rows selected (0.429 seconds) > ``` > ```sql > spark-sql --master yarn --deploy-mode client --conf > 'spark.serializer=org.apache.spark.serializer.KryoSerializer' --conf > 'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension' > --principal .. --keytab .. > update test_hudi.test_hudi_table_sync_hive set name='a2' where id=1; > ``` > exception: > ``` > 22/01/19 11:04:50 ERROR SparkSQLDriver: Failed in [update > test_hudi.test_hudi_table_sync_hive set name='a2' where id=1] > java.lang.NullPointerException > at java.util.Hashtable.put(Hashtable.java:460) > at java.util.Hashtable.putAll(Hashtable.java:524) > at > org.apache.hudi.HoodieWriterUtils$.parametersWithWriteDefaults(HoodieWriterUtils.scala:52) > at > org.apache.hudi.HoodieSparkSqlWriter$.mergeParamsAndGetHoodieConfig(HoodieSparkSqlWriter.scala:722) > at > org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:87) > at > org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:164) > at > org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) > at > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:83) > at > org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:81) > at > org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676) > at > org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676) > at > org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75) > at > org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676) > at > org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:285) > at > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271) > at > org.apache.spark.sql.hudi.command.UpdateHoodieTableCommand.run(UpdateHoodieTableCommand.scala:80) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) > at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194) > at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194) > at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370) > at > org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79) > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:694) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:62) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:371) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:274) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) > at > org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845) > at > org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161) > at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184) > at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86) > at > org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > java.lang.NullPointerException > at java.util.Hashtable.put(Hashtable.java:460) > at java.util.Hashtable.putAll(Hashtable.java:524) > at > org.apache.hudi.HoodieWriterUtils$.parametersWithWriteDefaults(HoodieWriterUtils.scala:52) > at > org.apache.hudi.HoodieSparkSqlWriter$.mergeParamsAndGetHoodieConfig(HoodieSparkSqlWriter.scala:722) > at > org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:87) > at > org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:164) > at > org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) > at > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:83) > at > org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:81) > at > org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676) > at > org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676) > at > org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75) > at > org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676) > at > org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:285) > at > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271) > at > org.apache.spark.sql.hudi.command.UpdateHoodieTableCommand.run(UpdateHoodieTableCommand.scala:80) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) > at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194) > at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194) > at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370) > at > org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79) > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:694) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:62) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:371) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:274) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) > at > org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845) > at > org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161) > at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184) > at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86) > at > org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > ``` -- This message was sent by Atlassian Jira (v8.20.1#820001)