[
https://issues.apache.org/jira/browse/SPARK-6652?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Xiao Li resolved SPARK-6652.
----------------------------
Resolution: Fixed
> SQLContext and HiveContext do not handle "tricky" names well
> ------------------------------------------------------------
>
> Key: SPARK-6652
> URL: https://issues.apache.org/jira/browse/SPARK-6652
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 1.2.1
> Reporter: Max Seiden
>
> h3. Summary
> There are cases where both the SQLContext and HiveContext fail when handling
> "tricky names" (containing UTF-8, tabs, newlines, etc) well. For example, the
> following string:
> {noformat}
> val tricky = "Tricky-\u4E2D[x.][\",/\\n * ? é\n&$(x)\t(':;#!^-Name"
> {noformat}
> causes the following exceptions during parsing and resolution (respectively).
> h5. SQLContext parse failure
> {noformat}
> // pseudocode
> val data = 0 until 100
> val rdd = sc.parallelize(data)
> val schema = StructType(StructField(Tricky, IntegerType, false) :: Nil)
> val schemaRDD = sqlContext.applySchema(rdd.map(i => Row(i)), schema)
> schemaRDD.registerAsTable(Tricky)
> sqlContext.sql(s"select `$Tricky` from `$Tricky`")
> java.lang.RuntimeException: [1.33] failure: ``UNION'' expected but
> ErrorToken(``' expected but
> found) found
> select `Tricky-中[x.][",/\n * ? é
> ^
> at scala.sys.package$.error(package.scala:27)
> at
> org.apache.spark.sql.catalyst.AbstractSparkSQLParser.apply(SparkSQLParser.scala:33)
> at org.apache.spark.sql.SQLContext$$anonfun$1.apply(SQLContext.scala:79)
> at org.apache.spark.sql.SQLContext$$anonfun$1.apply(SQLContext.scala:79)
> at
> org.apache.spark.sql.catalyst.SparkSQLParser$$anonfun$org$apache$spark$sql$catalyst$SparkSQLParser$$others$1.apply(SparkSQLParser.scala:174)
> at
> org.apache.spark.sql.catalyst.SparkSQLParser$$anonfun$org$apache$spark$sql$catalyst$SparkSQLParser$$others$1.apply(SparkSQLParser.scala:173)
> at scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:136)
> at scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:135)
> at
> scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242)
> at
> scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242)
> at
> scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222)
> at
> scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254)
> at
> scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254)
> at
> scala.util.parsing.combinator.Parsers$Failure.append(Parsers.scala:202)
> at
> scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254)
> at
> scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254)
> at
> scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222)
> at
> scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891)
> at
> scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891)
> at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
> at
> scala.util.parsing.combinator.Parsers$$anon$2.apply(Parsers.scala:890)
> at
> scala.util.parsing.combinator.PackratParsers$$anon$1.apply(PackratParsers.scala:110)
> at
> org.apache.spark.sql.catalyst.AbstractSparkSQLParser.apply(SparkSQLParser.scala:31)
> at
> org.apache.spark.sql.SQLContext$$anonfun$parseSql$1.apply(SQLContext.scala:83)
> at
> org.apache.spark.sql.SQLContext$$anonfun$parseSql$1.apply(SQLContext.scala:83)
> at scala.Option.getOrElse(Option.scala:120)
> at org.apache.spark.sql.SQLContext.parseSql(SQLContext.scala:83)
> at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:303)
> {noformat}
> h5. HiveContext resolution failure
> {noformat}
> // pseudocode
> val data = 0 until 100
> val rdd = sc.parallelize(data)
> val schema = StructType(StructField(Tricky, IntegerType, false) :: Nil)
> val schemaRDD = sqlContext.applySchema(rdd.map(i => Row(i)), schema)
> schemaRDD.registerAsTable(Tricky)
> sqlContext.sql(s"select `$Tricky` from `$Tricky`").collect()
> // the parse is ok in this case...
> 15/04/01 10:41:48 WARN HiveConf: DEPRECATED: hive.metastore.ds.retry.* no
> longer has any effect. Use hive.hmshandler.retry.* instead
> 15/04/01 10:41:48 INFO ParseDriver: Parsing command: select
> `Tricky-中[x.][",/\n * ? é
> &$(x) (':;#!^-Name` from `Tricky-中[x.][",/\n * ? é
> &$(x) (':;#!^-Name`
> 15/04/01 10:41:48 INFO ParseDriver: Parse Completed
> // but resolution fails
> org.apache.spark.sql.catalyst.errors.package$TreeNodeException: Unresolved
> attributes: 'Tricky-中[x.][",/\n * ? é
> &$(x) (':;#!^-Name, tree:
> 'Project ['Tricky-中[x.][",/\n * ? é
> &$(x) (':;#!^-Name]
> Subquery tricky-中[x.][",/\n * ? é
> &$(x) (':;#!^-name
> LogicalRDD [Tricky-中[x.][",/\n * ? é
> &$(x) (':;#!^-Name#2], MappedRDD[16] at map at <console>:30
> at
> org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$$anonfun$1.applyOrElse(Analyzer.scala:80)
> at
> org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$$anonfun$1.applyOrElse(Analyzer.scala:78)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:144)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:135)
> at
> org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$.apply(Analyzer.scala:78)
> at
> org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$.apply(Analyzer.scala:76)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59)
> at
> scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:51)
> at
> scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:60)
> at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:34)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51)
> at scala.collection.immutable.List.foreach(List.scala:318)
> at
> org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51)
> at
> org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:411)
> at
> org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:411)
> at
> org.apache.spark.sql.SQLContext$QueryExecution.withCachedData$lzycompute(SQLContext.scala:412)
> at
> org.apache.spark.sql.SQLContext$QueryExecution.withCachedData(SQLContext.scala:412)
> at
> org.apache.spark.sql.SQLContext$QueryExecution.optimizedPlan$lzycompute(SQLContext.scala:413)
> at
> org.apache.spark.sql.SQLContext$QueryExecution.optimizedPlan(SQLContext.scala:413)
> at
> org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan$lzycompute(SQLContext.scala:418)
> at
> org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan(SQLContext.scala:416)
> at
> org.apache.spark.sql.SQLContext$QueryExecution.executedPlan$lzycompute(SQLContext.scala:422)
> at
> org.apache.spark.sql.SQLContext$QueryExecution.executedPlan(SQLContext.scala:422)
> at org.apache.spark.sql.SchemaRDD.collect(SchemaRDD.scala:444)
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]