[jira] [Updated] (SPARK-12110) spark-1.5.1-bin-hadoop2.6; pyspark.ml.feature Exception: ("You must build Spark with Hive
[ https://issues.apache.org/jira/browse/SPARK-12110?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Andrew Davidson updated SPARK-12110: Attachment: launchCluster.sh.out launchCluster.sh launchCluster.sh is a wrapper around spark-ec2 script launchCluster.sh is the output from when I ran this script on nov 5th 2015 > spark-1.5.1-bin-hadoop2.6; pyspark.ml.feature Exception: ("You must build > Spark with Hive > > > Key: SPARK-12110 > URL: https://issues.apache.org/jira/browse/SPARK-12110 > Project: Spark > Issue Type: Bug > Components: EC2 >Affects Versions: 1.5.1 > Environment: cluster created using > spark-1.5.1-bin-hadoop2.6/ec2/spark-ec2 >Reporter: Andrew Davidson > Attachments: launchCluster.sh, launchCluster.sh.out > > > I am using spark-1.5.1-bin-hadoop2.6. I used > spark-1.5.1-bin-hadoop2.6/ec2/spark-ec2 to create a cluster and configured > spark-env to use python3. I can not run the tokenizer sample code. Is there a > work around? > Kind regards > Andy > {code} > /root/spark/python/pyspark/sql/context.py in _ssql_ctx(self) > 658 raise Exception("You must build Spark with Hive. " > 659 "Export 'SPARK_HIVE=true' and run " > --> 660 "build/sbt assembly", e) > 661 > 662 def _get_hive_ctx(self): > Exception: ("You must build Spark with Hive. Export 'SPARK_HIVE=true' and run > build/sbt assembly", Py4JJavaError('An error occurred while calling > None.org.apache.spark.sql.hive.HiveContext.\n', JavaObject id=o38)) > http://spark.apache.org/docs/latest/ml-features.html#tokenizer > from pyspark.ml.feature import Tokenizer, RegexTokenizer > sentenceDataFrame = sqlContext.createDataFrame([ > (0, "Hi I heard about Spark"), > (1, "I wish Java could use case classes"), > (2, "Logistic,regression,models,are,neat") > ], ["label", "sentence"]) > tokenizer = Tokenizer(inputCol="sentence", outputCol="words") > wordsDataFrame = tokenizer.transform(sentenceDataFrame) > for words_label in wordsDataFrame.select("words", "label").take(3): > print(words_label) > --- > Py4JJavaError Traceback (most recent call last) > /root/spark/python/pyspark/sql/context.py in _ssql_ctx(self) > 654 if not hasattr(self, '_scala_HiveContext'): > --> 655 self._scala_HiveContext = self._get_hive_ctx() > 656 return self._scala_HiveContext > /root/spark/python/pyspark/sql/context.py in _get_hive_ctx(self) > 662 def _get_hive_ctx(self): > --> 663 return self._jvm.HiveContext(self._jsc.sc()) > 664 > /root/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py in > __call__(self, *args) > 700 return_value = get_return_value(answer, self._gateway_client, > None, > --> 701 self._fqn) > 702 > /root/spark/python/pyspark/sql/utils.py in deco(*a, **kw) > 35 try: > ---> 36 return f(*a, **kw) > 37 except py4j.protocol.Py4JJavaError as e: > /root/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py in > get_return_value(answer, gateway_client, target_id, name) > 299 'An error occurred while calling {0}{1}{2}.\n'. > --> 300 format(target_id, '.', name), value) > 301 else: > Py4JJavaError: An error occurred while calling > None.org.apache.spark.sql.hive.HiveContext. > : java.lang.RuntimeException: java.io.IOException: Filesystem closed > at > org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522) > at > org.apache.spark.sql.hive.client.ClientWrapper.(ClientWrapper.scala:171) > at > org.apache.spark.sql.hive.HiveContext.executionHive$lzycompute(HiveContext.scala:162) > at > org.apache.spark.sql.hive.HiveContext.executionHive(HiveContext.scala:160) > at org.apache.spark.sql.hive.HiveContext.(HiveContext.scala:167) > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:422) > at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:234) > at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379) > at py4j.Gateway.invoke(Gateway.java:214) > at > py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:79) > at
[jira] [Updated] (SPARK-12110) spark-1.5.1-bin-hadoop2.6; pyspark.ml.feature Exception: ("You must build Spark with Hive
[ https://issues.apache.org/jira/browse/SPARK-12110?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Andrew Davidson updated SPARK-12110: Attachment: launchingSparkCluster.md > spark-1.5.1-bin-hadoop2.6; pyspark.ml.feature Exception: ("You must build > Spark with Hive > > > Key: SPARK-12110 > URL: https://issues.apache.org/jira/browse/SPARK-12110 > Project: Spark > Issue Type: Bug > Components: EC2 >Affects Versions: 1.5.1 > Environment: cluster created using > spark-1.5.1-bin-hadoop2.6/ec2/spark-ec2 >Reporter: Andrew Davidson > Attachments: launchCluster.sh, launchCluster.sh.out, > launchingSparkCluster.md > > > I am using spark-1.5.1-bin-hadoop2.6. I used > spark-1.5.1-bin-hadoop2.6/ec2/spark-ec2 to create a cluster and configured > spark-env to use python3. I can not run the tokenizer sample code. Is there a > work around? > Kind regards > Andy > {code} > /root/spark/python/pyspark/sql/context.py in _ssql_ctx(self) > 658 raise Exception("You must build Spark with Hive. " > 659 "Export 'SPARK_HIVE=true' and run " > --> 660 "build/sbt assembly", e) > 661 > 662 def _get_hive_ctx(self): > Exception: ("You must build Spark with Hive. Export 'SPARK_HIVE=true' and run > build/sbt assembly", Py4JJavaError('An error occurred while calling > None.org.apache.spark.sql.hive.HiveContext.\n', JavaObject id=o38)) > http://spark.apache.org/docs/latest/ml-features.html#tokenizer > from pyspark.ml.feature import Tokenizer, RegexTokenizer > sentenceDataFrame = sqlContext.createDataFrame([ > (0, "Hi I heard about Spark"), > (1, "I wish Java could use case classes"), > (2, "Logistic,regression,models,are,neat") > ], ["label", "sentence"]) > tokenizer = Tokenizer(inputCol="sentence", outputCol="words") > wordsDataFrame = tokenizer.transform(sentenceDataFrame) > for words_label in wordsDataFrame.select("words", "label").take(3): > print(words_label) > --- > Py4JJavaError Traceback (most recent call last) > /root/spark/python/pyspark/sql/context.py in _ssql_ctx(self) > 654 if not hasattr(self, '_scala_HiveContext'): > --> 655 self._scala_HiveContext = self._get_hive_ctx() > 656 return self._scala_HiveContext > /root/spark/python/pyspark/sql/context.py in _get_hive_ctx(self) > 662 def _get_hive_ctx(self): > --> 663 return self._jvm.HiveContext(self._jsc.sc()) > 664 > /root/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py in > __call__(self, *args) > 700 return_value = get_return_value(answer, self._gateway_client, > None, > --> 701 self._fqn) > 702 > /root/spark/python/pyspark/sql/utils.py in deco(*a, **kw) > 35 try: > ---> 36 return f(*a, **kw) > 37 except py4j.protocol.Py4JJavaError as e: > /root/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py in > get_return_value(answer, gateway_client, target_id, name) > 299 'An error occurred while calling {0}{1}{2}.\n'. > --> 300 format(target_id, '.', name), value) > 301 else: > Py4JJavaError: An error occurred while calling > None.org.apache.spark.sql.hive.HiveContext. > : java.lang.RuntimeException: java.io.IOException: Filesystem closed > at > org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522) > at > org.apache.spark.sql.hive.client.ClientWrapper.(ClientWrapper.scala:171) > at > org.apache.spark.sql.hive.HiveContext.executionHive$lzycompute(HiveContext.scala:162) > at > org.apache.spark.sql.hive.HiveContext.executionHive(HiveContext.scala:160) > at org.apache.spark.sql.hive.HiveContext.(HiveContext.scala:167) > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:422) > at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:234) > at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379) > at py4j.Gateway.invoke(Gateway.java:214) > at > py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:79) > at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:68) > at py4j.GatewayConnection.run(GatewayConnection.java:207) > at
[jira] [Updated] (SPARK-12110) spark-1.5.1-bin-hadoop2.6; pyspark.ml.feature Exception: ("You must build Spark with Hive
[ https://issues.apache.org/jira/browse/SPARK-12110?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Patrick Wendell updated SPARK-12110: Description: I am using spark-1.5.1-bin-hadoop2.6. I used spark-1.5.1-bin-hadoop2.6/ec2/spark-ec2 to create a cluster and configured spark-env to use python3. I can not run the tokenizer sample code. Is there a work around? Kind regards Andy {code} /root/spark/python/pyspark/sql/context.py in _ssql_ctx(self) 658 raise Exception("You must build Spark with Hive. " 659 "Export 'SPARK_HIVE=true' and run " --> 660 "build/sbt assembly", e) 661 662 def _get_hive_ctx(self): Exception: ("You must build Spark with Hive. Export 'SPARK_HIVE=true' and run build/sbt assembly", Py4JJavaError('An error occurred while calling None.org.apache.spark.sql.hive.HiveContext.\n', JavaObject id=o38)) http://spark.apache.org/docs/latest/ml-features.html#tokenizer from pyspark.ml.feature import Tokenizer, RegexTokenizer sentenceDataFrame = sqlContext.createDataFrame([ (0, "Hi I heard about Spark"), (1, "I wish Java could use case classes"), (2, "Logistic,regression,models,are,neat") ], ["label", "sentence"]) tokenizer = Tokenizer(inputCol="sentence", outputCol="words") wordsDataFrame = tokenizer.transform(sentenceDataFrame) for words_label in wordsDataFrame.select("words", "label").take(3): print(words_label) --- Py4JJavaError Traceback (most recent call last) /root/spark/python/pyspark/sql/context.py in _ssql_ctx(self) 654 if not hasattr(self, '_scala_HiveContext'): --> 655 self._scala_HiveContext = self._get_hive_ctx() 656 return self._scala_HiveContext /root/spark/python/pyspark/sql/context.py in _get_hive_ctx(self) 662 def _get_hive_ctx(self): --> 663 return self._jvm.HiveContext(self._jsc.sc()) 664 /root/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py in __call__(self, *args) 700 return_value = get_return_value(answer, self._gateway_client, None, --> 701 self._fqn) 702 /root/spark/python/pyspark/sql/utils.py in deco(*a, **kw) 35 try: ---> 36 return f(*a, **kw) 37 except py4j.protocol.Py4JJavaError as e: /root/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name) 299 'An error occurred while calling {0}{1}{2}.\n'. --> 300 format(target_id, '.', name), value) 301 else: Py4JJavaError: An error occurred while calling None.org.apache.spark.sql.hive.HiveContext. : java.lang.RuntimeException: java.io.IOException: Filesystem closed at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522) at org.apache.spark.sql.hive.client.ClientWrapper.(ClientWrapper.scala:171) at org.apache.spark.sql.hive.HiveContext.executionHive$lzycompute(HiveContext.scala:162) at org.apache.spark.sql.hive.HiveContext.executionHive(HiveContext.scala:160) at org.apache.spark.sql.hive.HiveContext.(HiveContext.scala:167) at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.lang.reflect.Constructor.newInstance(Constructor.java:422) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:234) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379) at py4j.Gateway.invoke(Gateway.java:214) at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:79) at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:68) at py4j.GatewayConnection.run(GatewayConnection.java:207) at java.lang.Thread.run(Thread.java:745) Caused by: java.io.IOException: Filesystem closed at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:323) at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1057) at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:554) at org.apache.hadoop.hive.ql.session.SessionState.createRootHDFSDir(SessionState.java:599) at org.apache.hadoop.hive.ql.session.SessionState.createSessionDirs(SessionState.java:554) at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:508) ... 15 more During handling of the above exception, another exception occurred: Exception Traceback (most recent call last) in
[jira] [Updated] (SPARK-12110) spark-1.5.1-bin-hadoop2.6; pyspark.ml.feature Exception: ("You must build Spark with Hive
[ https://issues.apache.org/jira/browse/SPARK-12110?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Patrick Wendell updated SPARK-12110: Component/s: (was: ML) (was: SQL) (was: PySpark) EC2 > spark-1.5.1-bin-hadoop2.6; pyspark.ml.feature Exception: ("You must build > Spark with Hive > > > Key: SPARK-12110 > URL: https://issues.apache.org/jira/browse/SPARK-12110 > Project: Spark > Issue Type: Bug > Components: EC2 >Affects Versions: 1.5.1 > Environment: cluster created using > spark-1.5.1-bin-hadoop2.6/ec2/spark-ec2 >Reporter: Andrew Davidson > > I am using spark-1.5.1-bin-hadoop2.6. I used > spark-1.5.1-bin-hadoop2.6/ec2/spark-ec2 to create a cluster and configured > spark-env to use python3. I can not run the tokenizer sample code. Is there a > work around? > Kind regards > Andy > /root/spark/python/pyspark/sql/context.py in _ssql_ctx(self) > 658 raise Exception("You must build Spark with Hive. " > 659 "Export 'SPARK_HIVE=true' and run " > --> 660 "build/sbt assembly", e) > 661 > 662 def _get_hive_ctx(self): > Exception: ("You must build Spark with Hive. Export 'SPARK_HIVE=true' and run > build/sbt assembly", Py4JJavaError('An error occurred while calling > None.org.apache.spark.sql.hive.HiveContext.\n', JavaObject id=o38)) > http://spark.apache.org/docs/latest/ml-features.html#tokenizer > from pyspark.ml.feature import Tokenizer, RegexTokenizer > sentenceDataFrame = sqlContext.createDataFrame([ > (0, "Hi I heard about Spark"), > (1, "I wish Java could use case classes"), > (2, "Logistic,regression,models,are,neat") > ], ["label", "sentence"]) > tokenizer = Tokenizer(inputCol="sentence", outputCol="words") > wordsDataFrame = tokenizer.transform(sentenceDataFrame) > for words_label in wordsDataFrame.select("words", "label").take(3): > print(words_label) > --- > Py4JJavaError Traceback (most recent call last) > /root/spark/python/pyspark/sql/context.py in _ssql_ctx(self) > 654 if not hasattr(self, '_scala_HiveContext'): > --> 655 self._scala_HiveContext = self._get_hive_ctx() > 656 return self._scala_HiveContext > /root/spark/python/pyspark/sql/context.py in _get_hive_ctx(self) > 662 def _get_hive_ctx(self): > --> 663 return self._jvm.HiveContext(self._jsc.sc()) > 664 > /root/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py in > __call__(self, *args) > 700 return_value = get_return_value(answer, self._gateway_client, > None, > --> 701 self._fqn) > 702 > /root/spark/python/pyspark/sql/utils.py in deco(*a, **kw) > 35 try: > ---> 36 return f(*a, **kw) > 37 except py4j.protocol.Py4JJavaError as e: > /root/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py in > get_return_value(answer, gateway_client, target_id, name) > 299 'An error occurred while calling {0}{1}{2}.\n'. > --> 300 format(target_id, '.', name), value) > 301 else: > Py4JJavaError: An error occurred while calling > None.org.apache.spark.sql.hive.HiveContext. > : java.lang.RuntimeException: java.io.IOException: Filesystem closed > at > org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522) > at > org.apache.spark.sql.hive.client.ClientWrapper.(ClientWrapper.scala:171) > at > org.apache.spark.sql.hive.HiveContext.executionHive$lzycompute(HiveContext.scala:162) > at > org.apache.spark.sql.hive.HiveContext.executionHive(HiveContext.scala:160) > at org.apache.spark.sql.hive.HiveContext.(HiveContext.scala:167) > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:422) > at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:234) > at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379) > at py4j.Gateway.invoke(Gateway.java:214) > at > py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:79) > at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:68) > at py4j.GatewayConnection.run(GatewayConnection.java:207) > at java.lang.Thread.run(Thread.java:745) > Caused by: