Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/20018#discussion_r158210374 --- Diff: examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala --- @@ -102,8 +101,63 @@ object SparkHiveExample { // | 4| val_4| 4| val_4| // | 5| val_5| 5| val_5| // ... - // $example off:spark_hive$ + /* + * Save DataFrame to Hive Managed table as Parquet format + * 1. Create Hive Database / Schema with location at HDFS if you want to mentioned explicitly else default + * warehouse location will be used to store Hive table Data. + * Ex: CREATE DATABASE IF NOT EXISTS database_name LOCATION hdfs_path; + * You don't have to explicitly give location for each table, every tables under specified schema will be located at + * location given while creating schema. + * 2. Create Hive Managed table with storage format as 'Parquet' + * Ex: CREATE TABLE records(key int, value string) STORED AS PARQUET; + */ + val hiveTableDF = sql("SELECT * FROM records").toDF() --- End diff -- `.toDF` is not needed
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org