[ https://issues.apache.org/jira/browse/SPARK-13588?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Akshat Thakar updated SPARK-13588: ---------------------------------- Description: I am trying to map existing Parquet file with external table using Pyspark script. I am using Hive Context to execute Hive SQL. I was able to execute same SQL command using Hive shell. I get below error while doing so- >>> hive.sql('create external table temp_inserts like new_inserts stored as >>> parquet LOCATION "/user/hive/warehouse/temp_inserts') 16/03/01 06:24:01 INFO ParseDriver: Parsing command: create external table temp_inserts like new_inserts stored as parquet LOCATION "/user/hive/warehouse/temp_inserts Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/usr/hdp/2.3.0.0-2557/spark/python/pyspark/sql/context.py", line 488, in sql return DataFrame(self._ssql_ctx.sql(sqlQuery), self) File "/usr/hdp/2.3.0.0-2557/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py", line 538, in __call__ File "/usr/hdp/2.3.0.0-2557/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py", line 300, in get_return_value py4j.protocol.Py4JJavaError: An error occurred while calling o36.sql. : org.apache.spark.sql.AnalysisException: missing EOF at 'stored' near 'new_inserts'; line 1 pos 52 at org.apache.spark.sql.hive.HiveQl$.createPlan(HiveQl.scala:254) at org.apache.spark.sql.hive.ExtendedHiveQlParser$$anonfun$hiveQl$1.apply(ExtendedHiveQlParser.scala:41) at org.apache.spark.sql.hive.ExtendedHiveQlParser$$anonfun$hiveQl$1.apply(ExtendedHiveQlParser.scala:40) at scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:136) at scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:135) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242) at scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$Failure.append(Parsers.scala:202) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222) at scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891) at scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57) at scala.util.parsing.combinator.Parsers$$anon$2.apply(Parsers.scala:890) at scala.util.parsing.combinator.PackratParsers$$anon$1.apply(PackratParsers.scala:110) at org.apache.spark.sql.catalyst.AbstractSparkSQLParser.apply(AbstractSparkSQLParser.scala:38) at org.apache.spark.sql.hive.HiveQl$$anonfun$3.apply(HiveQl.scala:138) at org.apache.spark.sql.hive.HiveQl$$anonfun$3.apply(HiveQl.scala:138) at org.apache.spark.sql.SparkSQLParser$$anonfun$org$apache$spark$sql$SparkSQLParser$$others$1.apply(SparkSQLParser.scala:96) at org.apache.spark.sql.SparkSQLParser$$anonfun$org$apache$spark$sql$SparkSQLParser$$others$1.apply(SparkSQLParser.scala:95) at scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:136) at scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:135) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242) at scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$Failure.append(Parsers.scala:202) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222) at scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891) at scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57) at scala.util.parsing.combinator.Parsers$$anon$2.apply(Parsers.scala:890) at scala.util.parsing.combinator.PackratParsers$$anon$1.apply(PackratParsers.scala:110) at org.apache.spark.sql.catalyst.AbstractSparkSQLParser.apply(AbstractSparkSQLParser.scala:38) at org.apache.spark.sql.hive.HiveQl$.parseSql(HiveQl.scala:234) at org.apache.spark.sql.hive.HiveContext$$anonfun$sql$1.apply(HiveContext.scala:103) at org.apache.spark.sql.hive.HiveContext$$anonfun$sql$1.apply(HiveContext.scala:103) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.sql.hive.HiveContext.sql(HiveContext.scala:103) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379) at py4j.Gateway.invoke(Gateway.java:259) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:207) at java.lang.Thread.run(Thread.java:745) was: I am trying to map existing Parquet file with external table using Pyspark script. I am using Hive Context to execute Hive SQL. I was able to execute same SQL command using Hive shell. I get below error while doing so- >>> hive.sql('create external table temp_inserts like new_inserts stored as >>> parquet LOCATION "/user/hive/warehouse/temp_inserts') 16/03/01 06:16:35 INFO ParseDriver: Parsing command: create external table cdc_new like cdc_new stored as parquet LOCATION "/user/hive/warehouse/cdc_temp_inserts Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/usr/hdp/2.3.0.0-2557/spark/python/pyspark/sql/context.py", line 488, in sql return DataFrame(self._ssql_ctx.sql(sqlQuery), self) File "/usr/hdp/2.3.0.0-2557/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py", line 538, in __call__ File "/usr/hdp/2.3.0.0-2557/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py", line 300, in get_return_value py4j.protocol.Py4JJavaError: An error occurred while calling o36.sql. : org.apache.spark.sql.AnalysisException: missing EOF at 'stored' near 'temp_inserts'; line 1 pos 44 at org.apache.spark.sql.hive.HiveQl$.createPlan(HiveQl.scala:254) at org.apache.spark.sql.hive.ExtendedHiveQlParser$$anonfun$hiveQl$1.apply(ExtendedHiveQlParser.scala:41) at org.apache.spark.sql.hive.ExtendedHiveQlParser$$anonfun$hiveQl$1.apply(ExtendedHiveQlParser.scala:40) at scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:136) at scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:135) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242) at scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$Failure.append(Parsers.scala:202) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222) at scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891) at scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57) at scala.util.parsing.combinator.Parsers$$anon$2.apply(Parsers.scala:890) at scala.util.parsing.combinator.PackratParsers$$anon$1.apply(PackratParsers.scala:110) at org.apache.spark.sql.catalyst.AbstractSparkSQLParser.apply(AbstractSparkSQLParser.scala:38) at org.apache.spark.sql.hive.HiveQl$$anonfun$3.apply(HiveQl.scala:138) at org.apache.spark.sql.hive.HiveQl$$anonfun$3.apply(HiveQl.scala:138) at org.apache.spark.sql.SparkSQLParser$$anonfun$org$apache$spark$sql$SparkSQLParser$$others$1.apply(SparkSQLParser.scala:96) at org.apache.spark.sql.SparkSQLParser$$anonfun$org$apache$spark$sql$SparkSQLParser$$others$1.apply(SparkSQLParser.scala:95) at scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:136) at scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:135) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242) at scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$Failure.append(Parsers.scala:202) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254) at scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222) at scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891) at scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57) at scala.util.parsing.combinator.Parsers$$anon$2.apply(Parsers.scala:890) at scala.util.parsing.combinator.PackratParsers$$anon$1.apply(PackratParsers.scala:110) at org.apache.spark.sql.catalyst.AbstractSparkSQLParser.apply(AbstractSparkSQLParser.scala:38) at org.apache.spark.sql.hive.HiveQl$.parseSql(HiveQl.scala:234) at org.apache.spark.sql.hive.HiveContext$$anonfun$sql$1.apply(HiveContext.scala:103) at org.apache.spark.sql.hive.HiveContext$$anonfun$sql$1.apply(HiveContext.scala:103) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.sql.hive.HiveContext.sql(HiveContext.scala:103) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379) at py4j.Gateway.invoke(Gateway.java:259) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:207) at java.lang.Thread.run(Thread.java:745) > Unable to Map Parquet file to Hive Table using HiveContext > ---------------------------------------------------------- > > Key: SPARK-13588 > URL: https://issues.apache.org/jira/browse/SPARK-13588 > Project: Spark > Issue Type: Bug > Components: PySpark > Affects Versions: 1.3.1 > Environment: Linux Red Hat 6.7, Hortonworks HDP 2.3 Distribution > Reporter: Akshat Thakar > Priority: Minor > > I am trying to map existing Parquet file with external table using Pyspark > script. I am using Hive Context to execute Hive SQL. > I was able to execute same SQL command using Hive shell. > I get below error while doing so- > >>> hive.sql('create external table temp_inserts like new_inserts stored as > >>> parquet LOCATION "/user/hive/warehouse/temp_inserts') > 16/03/01 06:24:01 INFO ParseDriver: Parsing command: create external table > temp_inserts like new_inserts stored as parquet LOCATION > "/user/hive/warehouse/temp_inserts > Traceback (most recent call last): > File "<stdin>", line 1, in <module> > File "/usr/hdp/2.3.0.0-2557/spark/python/pyspark/sql/context.py", line 488, > in sql > return DataFrame(self._ssql_ctx.sql(sqlQuery), self) > File > "/usr/hdp/2.3.0.0-2557/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py", > line 538, in __call__ > File > "/usr/hdp/2.3.0.0-2557/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py", > line 300, in get_return_value > py4j.protocol.Py4JJavaError: An error occurred while calling o36.sql. > : org.apache.spark.sql.AnalysisException: missing EOF at 'stored' near > 'new_inserts'; line 1 pos 52 > at org.apache.spark.sql.hive.HiveQl$.createPlan(HiveQl.scala:254) > at > org.apache.spark.sql.hive.ExtendedHiveQlParser$$anonfun$hiveQl$1.apply(ExtendedHiveQlParser.scala:41) > at > org.apache.spark.sql.hive.ExtendedHiveQlParser$$anonfun$hiveQl$1.apply(ExtendedHiveQlParser.scala:40) > at > scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:136) > at > scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:135) > at > scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242) > at > scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242) > at > scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222) > at > scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254) > at > scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254) > at > scala.util.parsing.combinator.Parsers$Failure.append(Parsers.scala:202) > at > scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254) > at > scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254) > at > scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222) > at > scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891) > at > scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891) > at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57) > at > scala.util.parsing.combinator.Parsers$$anon$2.apply(Parsers.scala:890) > at > scala.util.parsing.combinator.PackratParsers$$anon$1.apply(PackratParsers.scala:110) > at > org.apache.spark.sql.catalyst.AbstractSparkSQLParser.apply(AbstractSparkSQLParser.scala:38) > at org.apache.spark.sql.hive.HiveQl$$anonfun$3.apply(HiveQl.scala:138) > at org.apache.spark.sql.hive.HiveQl$$anonfun$3.apply(HiveQl.scala:138) > at > org.apache.spark.sql.SparkSQLParser$$anonfun$org$apache$spark$sql$SparkSQLParser$$others$1.apply(SparkSQLParser.scala:96) > at > org.apache.spark.sql.SparkSQLParser$$anonfun$org$apache$spark$sql$SparkSQLParser$$others$1.apply(SparkSQLParser.scala:95) > at > scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:136) > at > scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:135) > at > scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242) > at > scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242) > at > scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222) > at > scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254) > at > scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254) > at > scala.util.parsing.combinator.Parsers$Failure.append(Parsers.scala:202) > at > scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254) > at > scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254) > at > scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222) > at > scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891) > at > scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891) > at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57) > at > scala.util.parsing.combinator.Parsers$$anon$2.apply(Parsers.scala:890) > at > scala.util.parsing.combinator.PackratParsers$$anon$1.apply(PackratParsers.scala:110) > at > org.apache.spark.sql.catalyst.AbstractSparkSQLParser.apply(AbstractSparkSQLParser.scala:38) > at org.apache.spark.sql.hive.HiveQl$.parseSql(HiveQl.scala:234) > at > org.apache.spark.sql.hive.HiveContext$$anonfun$sql$1.apply(HiveContext.scala:103) > at > org.apache.spark.sql.hive.HiveContext$$anonfun$sql$1.apply(HiveContext.scala:103) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.sql.hive.HiveContext.sql(HiveContext.scala:103) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231) > at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379) > at py4j.Gateway.invoke(Gateway.java:259) > at > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133) > at py4j.commands.CallCommand.execute(CallCommand.java:79) > at py4j.GatewayConnection.run(GatewayConnection.java:207) > at java.lang.Thread.run(Thread.java:745) -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org