Another error. Anyone have any idea? this one happens when I tried to convert a spark dataframe to pandas:
---------------------------------------------------------------------------Py4JError Traceback (most recent call last)/home/ubuntu/spark-2.1.1-bin-hadoop2.7/python/pyspark/sql/dataframe.py in collect(self) 390 with SCCallSiteSync(self._sc) as css:--> 391 port = self._jdf.collectToPython() 392 return list(_load_from_socket(port, BatchedSerializer(PickleSerializer()))) /home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py in __call__(self, *args) 1132 return_value = get_return_value(-> 1133 answer, self.gateway_client, self.target_id, self.name) 1134 /home/ubuntu/spark-2.1.1-bin-hadoop2.7/python/pyspark/sql/utils.py in deco(*a, **kw) 62 try:---> 63 return f(*a, **kw) 64 except py4j.protocol.Py4JJavaError as e: /home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name) 326 "An error occurred while calling {0}{1}{2}".--> 327 format(target_id, ".", name)) 328 else: Py4JError: An error occurred while calling o69.collectToPython During handling of the above exception, another exception occurred: IndexError Traceback (most recent call last)/home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py in _get_connection(self) 826 try:--> 827 connection = self.deque.pop() 828 except IndexError: IndexError: pop from an empty deque During handling of the above exception, another exception occurred: ConnectionRefusedError Traceback (most recent call last)/home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py in start(self) 962 try:--> 963 self.socket.connect((self.address, self.port)) 964 self.is_connected = True ConnectionRefusedError: [Errno 111] Connection refused During handling of the above exception, another exception occurred: Py4JNetworkError Traceback (most recent call last)<ipython-input-6-f8a4afbd6e4f> in <module>() 7 'lead_time', 'dep_weekday', 'dep_weeknum', 8 'days_to_last_holiday', 'days_to_next_holiday',----> 9 'duration_minutes', 'stop_minutes').toPandas() 10 flight_pd.head() /home/ubuntu/spark-2.1.1-bin-hadoop2.7/python/pyspark/sql/dataframe.py in toPandas(self) 1583 """ 1584 import pandas as pd-> 1585 return pd.DataFrame.from_records(self.collect(), columns=self.columns) 1586 1587 ########################################################################################## /home/ubuntu/spark-2.1.1-bin-hadoop2.7/python/pyspark/sql/dataframe.py in collect(self) 389 """ 390 with SCCallSiteSync(self._sc) as css:--> 391 port = self._jdf.collectToPython() 392 return list(_load_from_socket(port, BatchedSerializer(PickleSerializer()))) 393 /home/ubuntu/spark-2.1.1-bin-hadoop2.7/python/pyspark/traceback_utils.py in __exit__(self, type, value, tb) 76 SCCallSiteSync._spark_stack_depth -= 1 77 if SCCallSiteSync._spark_stack_depth == 0:---> 78 self._context._jsc.setCallSite(None) /home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py in __call__(self, *args) 1129 proto.END_COMMAND_PART 1130 -> 1131 answer = self.gateway_client.send_command(command) 1132 return_value = get_return_value( 1133 answer, self.gateway_client, self.target_id, self.name) /home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py in send_command(self, command, retry, binary) 879 if `binary` is `True`. 880 """--> 881 connection = self._get_connection() 882 try: 883 response = connection.send_command(command) /home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py in _get_connection(self) 827 connection = self.deque.pop() 828 except IndexError:--> 829 connection = self._create_connection() 830 return connection 831 /home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py in _create_connection(self) 833 connection = GatewayConnection( 834 self.gateway_parameters, self.gateway_property)--> 835 connection.start() 836 return connection 837 /home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py in start(self) 968 "server ({0}:{1})".format(self.address, self.port) 969 logger.exception(msg)--> 970 raise Py4JNetworkError(msg, e) 971 972 def close(self, reset=False): Py4JNetworkError: An error occurred while trying to connect to the Java server (127.0.0.1:34166) On Sat, May 13, 2017 at 10:21 PM, Zeming Yu <zemin...@gmail.com> wrote: > My code runs error free on my local pc. Just tried running the same code > on a ubuntu machine on ec2, and got the error below. Any idea where to > start in terms of debugging? > > ---------------------------------------------------------------------------Py4JError > Traceback (most recent call > last)<ipython-input-21-107cdbd63b26> in <module>()----> 1 output.show(2) > /home/ubuntu/spark-2.1.1-bin-hadoop2.7/python/pyspark/sql/dataframe.py in > show(self, n, truncate) 316 """ 317 if > isinstance(truncate, bool) and truncate:--> 318 > print(self._jdf.showString(n, 20)) 319 else: 320 > print(self._jdf.showString(n, int(truncate))) > /home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py in > __call__(self, *args) 1131 answer = > self.gateway_client.send_command(command) 1132 return_value = > get_return_value(-> 1133 answer, self.gateway_client, > self.target_id, self.name) 1134 1135 for temp_arg in temp_args: > /home/ubuntu/spark-2.1.1-bin-hadoop2.7/python/pyspark/sql/utils.py in > deco(*a, **kw) 61 def deco(*a, **kw): 62 try:---> 63 > return f(*a, **kw) 64 except py4j.protocol.Py4JJavaError > as e: 65 s = e.java_exception.toString() > /home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/protocol.py in > get_return_value(answer, gateway_client, target_id, name) 325 > raise Py4JError( 326 "An error occurred while calling > {0}{1}{2}".--> 327 format(target_id, ".", name)) 328 > else: 329 type = answer[1] > Py4JError: An error occurred while calling o648.showString > >