[ https://issues.apache.org/jira/browse/SPARK-1836?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Michael Malak updated SPARK-1836: --------------------------------- Description: Anand Avati partially traced the cause to REPL wrapping classes in $outer classes. There are at least two major symptoms: 1. equals() ========= In REPL equals() (required in custom classes used as a key for groupByKey) seems to have to be written using instanceOf[] instead of the canonical match{} Spark Shell (equals uses match{}): {noformat} class C(val s:String) extends Serializable { override def equals(o: Any) = o match { case that: C => that.s == s case _ => false } } val x = new C("a") val bos = new java.io.ByteArrayOutputStream() val out = new java.io.ObjectOutputStream(bos) out.writeObject(x); val b = bos.toByteArray(); out.close bos.close val y = new java.io.ObjectInputStream(new ava.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C] x.equals(y) res: Boolean = false {noformat} Spark Shell (equals uses isInstanceOf[]): {noformat} class C(val s:String) extends Serializable { override def equals(o: Any) = if (o.isInstanceOf[C]) (o.asInstanceOf[C].s = s) else false } val x = new C("a") val bos = new java.io.ByteArrayOutputStream() val out = new java.io.ObjectOutputStream(bos) out.writeObject(x); val b = bos.toByteArray(); out.close bos.close val y = new java.io.ObjectInputStream(new ava.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C] x.equals(y) res: Boolean = true {noformat} Scala Shell (equals uses match{}): {noformat} class C(val s:String) extends Serializable { override def equals(o: Any) = o match { case that: C => that.s == s case _ => false } } val x = new C("a") val bos = new java.io.ByteArrayOutputStream() val out = new java.io.ObjectOutputStream(bos) out.writeObject(x); val b = bos.toByteArray(); out.close bos.close val y = new java.io.ObjectInputStream(new java.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C] x.equals(y) res: Boolean = true {noformat} 2. lookup() ========= {noformat} class C(val s:String) extends Serializable { override def equals(o: Any) = if (o.isInstanceOf[C]) o.asInstanceOf[C].s == s else false override def hashCode = s.hashCode override def toString = s } val r = sc.parallelize(Array((new C("a"),11),(new C("a"),12))) r.lookup(new C("a")) <console>:17: error: type mismatch; found : C required: C r.lookup(new C("a")) ^ {noformat} See http://mail-archives.apache.org/mod_mbox/spark-dev/201405.mbox/%3C1400019424.80629.YahooMailNeo%40web160801.mail.bf1.yahoo.com%3E was: Anand Avati partially traced the cause to REPL wrapping classes in $outer classes. There are at least two major symptoms: 1. equals() ========= In REPL equals() (required in custom classes used as a key for groupByKey) seems to have to be written using instanceOf[] instead of the canonical match{} Spark Shell (equals uses match{}): class C(val s:String) extends Serializable { override def equals(o: Any) = o match { case that: C => that.s == s case _ => false } } val x = new C("a") val bos = new java.io.ByteArrayOutputStream() val out = new java.io.ObjectOutputStream(bos) out.writeObject(x); val b = bos.toByteArray(); out.close bos.close val y = new java.io.ObjectInputStream(new ava.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C] x.equals(y) res: Boolean = false Spark Shell (equals uses isInstanceOf[]): class C(val s:String) extends Serializable { override def equals(o: Any) = if (o.isInstanceOf[C]) (o.asInstanceOf[C].s = s) else false } val x = new C("a") val bos = new java.io.ByteArrayOutputStream() val out = new java.io.ObjectOutputStream(bos) out.writeObject(x); val b = bos.toByteArray(); out.close bos.close val y = new java.io.ObjectInputStream(new ava.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C] x.equals(y) res: Boolean = true Scala Shell (equals uses match{}): class C(val s:String) extends Serializable { override def equals(o: Any) = o match { case that: C => that.s == s case _ => false } } val x = new C("a") val bos = new java.io.ByteArrayOutputStream() val out = new java.io.ObjectOutputStream(bos) out.writeObject(x); val b = bos.toByteArray(); out.close bos.close val y = new java.io.ObjectInputStream(new java.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C] x.equals(y) res: Boolean = true 2. lookup() ========= class C(val s:String) extends Serializable { override def equals(o: Any) = if (o.isInstanceOf[C]) o.asInstanceOf[C].s == s else false override def hashCode = s.hashCode override def toString = s } val r = sc.parallelize(Array((new C("a"),11),(new C("a"),12))) r.lookup(new C("a")) <console>:17: error: type mismatch; found : C required: C r.lookup(new C("a")) ^ See http://mail-archives.apache.org/mod_mbox/spark-dev/201405.mbox/%3C1400019424.80629.YahooMailNeo%40web160801.mail.bf1.yahoo.com%3E > REPL $outer type mismatch causes lookup() and equals() problems > --------------------------------------------------------------- > > Key: SPARK-1836 > URL: https://issues.apache.org/jira/browse/SPARK-1836 > Project: Spark > Issue Type: Bug > Affects Versions: 0.9.0 > Reporter: Michael Malak > > Anand Avati partially traced the cause to REPL wrapping classes in $outer > classes. There are at least two major symptoms: > 1. equals() > ========= > In REPL equals() (required in custom classes used as a key for groupByKey) > seems to have to be written using instanceOf[] instead of the canonical > match{} > Spark Shell (equals uses match{}): > {noformat} > class C(val s:String) extends Serializable { > override def equals(o: Any) = o match { > case that: C => that.s == s > case _ => false > } > } > val x = new C("a") > val bos = new java.io.ByteArrayOutputStream() > val out = new java.io.ObjectOutputStream(bos) > out.writeObject(x); > val b = bos.toByteArray(); > out.close > bos.close > val y = new java.io.ObjectInputStream(new > ava.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C] > x.equals(y) > res: Boolean = false > {noformat} > Spark Shell (equals uses isInstanceOf[]): > {noformat} > class C(val s:String) extends Serializable { > override def equals(o: Any) = if (o.isInstanceOf[C]) (o.asInstanceOf[C].s = > s) else false > } > val x = new C("a") > val bos = new java.io.ByteArrayOutputStream() > val out = new java.io.ObjectOutputStream(bos) > out.writeObject(x); > val b = bos.toByteArray(); > out.close > bos.close > val y = new java.io.ObjectInputStream(new > ava.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C] > x.equals(y) > res: Boolean = true > {noformat} > Scala Shell (equals uses match{}): > {noformat} > class C(val s:String) extends Serializable { > override def equals(o: Any) = o match { > case that: C => that.s == s > case _ => false > } > } > val x = new C("a") > val bos = new java.io.ByteArrayOutputStream() > val out = new java.io.ObjectOutputStream(bos) > out.writeObject(x); > val b = bos.toByteArray(); > out.close > bos.close > val y = new java.io.ObjectInputStream(new > java.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C] > x.equals(y) > res: Boolean = true > {noformat} > 2. lookup() > ========= > {noformat} > class C(val s:String) extends Serializable { > override def equals(o: Any) = if (o.isInstanceOf[C]) o.asInstanceOf[C].s == > s else false > override def hashCode = s.hashCode > override def toString = s > } > val r = sc.parallelize(Array((new C("a"),11),(new C("a"),12))) > r.lookup(new C("a")) > <console>:17: error: type mismatch; > found : C > required: C > r.lookup(new C("a")) > ^ > {noformat} > See > http://mail-archives.apache.org/mod_mbox/spark-dev/201405.mbox/%3C1400019424.80629.YahooMailNeo%40web160801.mail.bf1.yahoo.com%3E -- This message was sent by Atlassian JIRA (v6.2#6252)