[ https://issues.apache.org/jira/browse/SPARK-11447?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14985569#comment-14985569 ]
kevin yu commented on SPARK-11447: ---------------------------------- Hello Kapil: When you say Doesn't work, does it mean that you got exception? I tried spark 1.5 , and this scala code works for me. Can you verify which spark version you are running? I saw you put 1.5.1 there. //DOESN'T WORK val filteredDF = df.filter(df("column") <=> (new Column(Literal(null)))) I run this on my spark shell at the latest version scala> val filteredDF = df.filter(df("column") <=> (new Column(Literal(null)))) filteredDF: org.apache.spark.sql.DataFrame = [column: string] thanks. > Null comparison requires type information but type extraction fails for > complex types > ------------------------------------------------------------------------------------- > > Key: SPARK-11447 > URL: https://issues.apache.org/jira/browse/SPARK-11447 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 1.5.1 > Reporter: Kapil Singh > > While comparing a Column to a null literal, comparison works only if type of > null literal matches type of the Column it's being compared to. Example scala > code (can be run from spark shell): > import org.apache.spark.sql._ > import org.apache.spark.sql.types._ > import org.apache.spark.sql.catalyst.expressions._ > val inputRowsData = Seq(Seq("abc"),Seq(null),Seq("xyz")) > val inputRows = for(seq <- inputRowsData) yield Row.fromSeq(seq) > val dfSchema = StructType(Seq(StructField("column", StringType, true))) > val df = sqlContext.createDataFrame(sc.makeRDD(inputRows), dfSchema) > //DOESN'T WORK > val filteredDF = df.filter(df("column") <=> (new Column(Literal(null)))) > //WORKS > val filteredDF = df.filter(df("column") <=> (new Column(Literal.create(null, > SparkleFunctions.dataType(df("column")))))) > Why should type information be required for a null comparison? If it's > required, it's not always possible to extract type information from complex > types (e.g. StructType). Following scala code (can be run from spark shell), > throws org.apache.spark.sql.catalyst.analysis.UnresolvedException: > import org.apache.spark.sql._ > import org.apache.spark.sql.types._ > import org.apache.spark.sql.catalyst.expressions._ > val inputRowsData = Seq(Seq(Row.fromSeq(Seq("abc", > "def"))),Seq(Row.fromSeq(Seq(null, "123"))),Seq(Row.fromSeq(Seq("ghi", > "jkl")))) > val inputRows = for(seq <- inputRowsData) yield Row.fromSeq(seq) > val dfSchema = StructType(Seq(StructField("column", > StructType(Seq(StructField("p1", StringType, true), StructField("p2", > StringType, true))), true))) > val filteredDF = df.filter(df("column")("p1") <=> (new > Column(Literal.create(null, SparkleFunctions.dataType(df("column")("p1")))))) > org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to > dataType on unresolved object, tree: column#0[p1] > at > org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue.dataType(unresolved.scala:243) > at > org.apache.spark.sql.ArithmeticFunctions$class.dataType(ArithmeticFunctions.scala:76) > at > org.apache.spark.sql.SparkleFunctions$.dataType(SparkleFunctions.scala:14) > at > $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:38) > at > $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:43) > at > $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:45) > at > $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:47) > at > $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:49) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:51) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:53) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:55) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:57) > at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:59) > at $iwC$$iwC$$iwC$$iwC.<init>(<console>:61) > at $iwC$$iwC$$iwC.<init>(<console>:63) > at $iwC$$iwC.<init>(<console>:65) > at $iwC.<init>(<console>:67) > at <init>(<console>:69) > at .<init>(<console>:73) > at .<clinit>(<console>) > at .<init>(<console>:7) > at .<clinit>(<console>) > at $print(<console>) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065) > at > org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1340) > at > org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840) > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871) > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819) > at > org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857) > at > org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) > at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) > at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) > at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665) > at > org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670) > at > org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997) > at > org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) > at > org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) > at > scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) > at > org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945) > at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059) > at org.apache.spark.repl.Main$.main(Main.scala:31) > at org.apache.spark.repl.Main.main(Main.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:672) > at > org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180) > at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org