[jira] [Commented] (SPARK-42266) Local mode should work with IPython
[ https://issues.apache.org/jira/browse/SPARK-42266?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17697727#comment-17697727 ] Apache Spark commented on SPARK-42266: -- User 'HyukjinKwon' has created a pull request for this issue: https://github.com/apache/spark/pull/40327 > Local mode should work with IPython > --- > > Key: SPARK-42266 > URL: https://issues.apache.org/jira/browse/SPARK-42266 > Project: Spark > Issue Type: Sub-task > Components: Connect, PySpark >Affects Versions: 3.4.0 >Reporter: Ruifeng Zheng >Priority: Major > > {code:java} > (spark_dev) ➜ spark git:(master) bin/pyspark --remote "local[*]" > Python 3.9.15 (main, Nov 24 2022, 08:28:41) > Type 'copyright', 'credits' or 'license' for more information > IPython 8.9.0 -- An enhanced Interactive Python. Type '?' for help. > /Users/ruifeng.zheng/Dev/spark/python/pyspark/shell.py:45: UserWarning: > Failed to initialize Spark session. > warnings.warn("Failed to initialize Spark session.") > Traceback (most recent call last): > File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/shell.py", line 40, in > > spark = SparkSession.builder.getOrCreate() > File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/session.py", line > 429, in getOrCreate > from pyspark.sql.connect.session import SparkSession as RemoteSparkSession > File > "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/connect/__init__.py", line > 21, in > from pyspark.sql.connect.dataframe import DataFrame # noqa: F401 > File > "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/connect/dataframe.py", > line 35, in > import pandas > File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/pandas/__init__.py", > line 29, in > from pyspark.pandas.missing.general_functions import > MissingPandasLikeGeneralFunctions > File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/pandas/__init__.py", > line 34, in > require_minimum_pandas_version() > File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/pandas/utils.py", > line 37, in require_minimum_pandas_version > if LooseVersion(pandas.__version__) < > LooseVersion(minimum_pandas_version): > AttributeError: partially initialized module 'pandas' has no attribute > '__version__' (most likely due to a circular import) > [TerminalIPythonApp] WARNING | Unknown error in handling PYTHONSTARTUP file > /Users/ruifeng.zheng/Dev/spark//python/pyspark/shell.py: > --- > AttributeErrorTraceback (most recent call last) > File ~/Dev/spark/python/pyspark/shell.py:40 > 38 try: > 39 # Creates pyspark.sql.connect.SparkSession. > ---> 40 spark = SparkSession.builder.getOrCreate() > 41 except Exception: > File ~/Dev/spark/python/pyspark/sql/session.py:429, in > SparkSession.Builder.getOrCreate(self) > 428 with SparkContext._lock: > --> 429 from pyspark.sql.connect.session import SparkSession as > RemoteSparkSession > 431 if ( > 432 SparkContext._active_spark_context is None > 433 and SparkSession._instantiatedSession is None > 434 ): > File ~/Dev/spark/python/pyspark/sql/connect/__init__.py:21 > 18 """Currently Spark Connect is very experimental and the APIs to > interact with > 19 Spark through this API are can be changed at any time without > warning.""" > ---> 21 from pyspark.sql.connect.dataframe import DataFrame # noqa: F401 > 22 from pyspark.sql.pandas.utils import ( > 23 require_minimum_pandas_version, > 24 require_minimum_pyarrow_version, > 25 require_minimum_grpc_version, > 26 ) > File ~/Dev/spark/python/pyspark/sql/connect/dataframe.py:35 > 34 import random > ---> 35 import pandas > 36 import json > File ~/Dev/spark/python/pyspark/pandas/__init__.py:29 > 27 from typing import Any > ---> 29 from pyspark.pandas.missing.general_functions import > MissingPandasLikeGeneralFunctions > 30 from pyspark.pandas.missing.scalars import MissingPandasLikeScalars > File ~/Dev/spark/python/pyspark/pandas/__init__.py:34 > 33 try: > ---> 34 require_minimum_pandas_version() > 35 require_minimum_pyarrow_version() > File ~/Dev/spark/python/pyspark/sql/pandas/utils.py:37, in > require_minimum_pandas_version() > 34 raise ImportError( > 35 "Pandas >= %s must be installed; however, " "it was not > found." % minimum_pandas_version > 36 ) from raised_error > ---> 37 if LooseVersion(pandas.__version__) < > LooseVersion(minimum_pandas_version): > 38 raise ImportError( > 39 "Pandas >= %s must be installed; however, " > 40 "your version was %s." % (minimum_pandas_version, > pandas.__version__) > 41 ) >
[jira] [Commented] (SPARK-42266) Local mode should work with IPython
[ https://issues.apache.org/jira/browse/SPARK-42266?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17685207#comment-17685207 ] Hyukjin Kwon commented on SPARK-42266: -- Let me take a look > Local mode should work with IPython > --- > > Key: SPARK-42266 > URL: https://issues.apache.org/jira/browse/SPARK-42266 > Project: Spark > Issue Type: Sub-task > Components: Connect, PySpark >Affects Versions: 3.4.0 >Reporter: Ruifeng Zheng >Priority: Major > > {code:java} > (spark_dev) ➜ spark git:(master) bin/pyspark --remote "local[*]" > Python 3.9.15 (main, Nov 24 2022, 08:28:41) > Type 'copyright', 'credits' or 'license' for more information > IPython 8.9.0 -- An enhanced Interactive Python. Type '?' for help. > /Users/ruifeng.zheng/Dev/spark/python/pyspark/shell.py:45: UserWarning: > Failed to initialize Spark session. > warnings.warn("Failed to initialize Spark session.") > Traceback (most recent call last): > File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/shell.py", line 40, in > > spark = SparkSession.builder.getOrCreate() > File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/session.py", line > 429, in getOrCreate > from pyspark.sql.connect.session import SparkSession as RemoteSparkSession > File > "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/connect/__init__.py", line > 21, in > from pyspark.sql.connect.dataframe import DataFrame # noqa: F401 > File > "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/connect/dataframe.py", > line 35, in > import pandas > File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/pandas/__init__.py", > line 29, in > from pyspark.pandas.missing.general_functions import > MissingPandasLikeGeneralFunctions > File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/pandas/__init__.py", > line 34, in > require_minimum_pandas_version() > File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/pandas/utils.py", > line 37, in require_minimum_pandas_version > if LooseVersion(pandas.__version__) < > LooseVersion(minimum_pandas_version): > AttributeError: partially initialized module 'pandas' has no attribute > '__version__' (most likely due to a circular import) > [TerminalIPythonApp] WARNING | Unknown error in handling PYTHONSTARTUP file > /Users/ruifeng.zheng/Dev/spark//python/pyspark/shell.py: > --- > AttributeErrorTraceback (most recent call last) > File ~/Dev/spark/python/pyspark/shell.py:40 > 38 try: > 39 # Creates pyspark.sql.connect.SparkSession. > ---> 40 spark = SparkSession.builder.getOrCreate() > 41 except Exception: > File ~/Dev/spark/python/pyspark/sql/session.py:429, in > SparkSession.Builder.getOrCreate(self) > 428 with SparkContext._lock: > --> 429 from pyspark.sql.connect.session import SparkSession as > RemoteSparkSession > 431 if ( > 432 SparkContext._active_spark_context is None > 433 and SparkSession._instantiatedSession is None > 434 ): > File ~/Dev/spark/python/pyspark/sql/connect/__init__.py:21 > 18 """Currently Spark Connect is very experimental and the APIs to > interact with > 19 Spark through this API are can be changed at any time without > warning.""" > ---> 21 from pyspark.sql.connect.dataframe import DataFrame # noqa: F401 > 22 from pyspark.sql.pandas.utils import ( > 23 require_minimum_pandas_version, > 24 require_minimum_pyarrow_version, > 25 require_minimum_grpc_version, > 26 ) > File ~/Dev/spark/python/pyspark/sql/connect/dataframe.py:35 > 34 import random > ---> 35 import pandas > 36 import json > File ~/Dev/spark/python/pyspark/pandas/__init__.py:29 > 27 from typing import Any > ---> 29 from pyspark.pandas.missing.general_functions import > MissingPandasLikeGeneralFunctions > 30 from pyspark.pandas.missing.scalars import MissingPandasLikeScalars > File ~/Dev/spark/python/pyspark/pandas/__init__.py:34 > 33 try: > ---> 34 require_minimum_pandas_version() > 35 require_minimum_pyarrow_version() > File ~/Dev/spark/python/pyspark/sql/pandas/utils.py:37, in > require_minimum_pandas_version() > 34 raise ImportError( > 35 "Pandas >= %s must be installed; however, " "it was not > found." % minimum_pandas_version > 36 ) from raised_error > ---> 37 if LooseVersion(pandas.__version__) < > LooseVersion(minimum_pandas_version): > 38 raise ImportError( > 39 "Pandas >= %s must be installed; however, " > 40 "your version was %s." % (minimum_pandas_version, > pandas.__version__) > 41 ) > AttributeError: partially initialized module 'pandas' has no attribute > '__version__'
[jira] [Commented] (SPARK-42266) Local mode should work with IPython
[ https://issues.apache.org/jira/browse/SPARK-42266?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17682829#comment-17682829 ] Ruifeng Zheng commented on SPARK-42266: --- cc [~gurwls223][~XinrongM] > Local mode should work with IPython > --- > > Key: SPARK-42266 > URL: https://issues.apache.org/jira/browse/SPARK-42266 > Project: Spark > Issue Type: Sub-task > Components: Connect, PySpark >Affects Versions: 3.4.0 >Reporter: Ruifeng Zheng >Priority: Major > > {code:java} > (spark_dev) ➜ spark git:(master) bin/pyspark --remote "local[*]" > Python 3.9.15 (main, Nov 24 2022, 08:28:41) > Type 'copyright', 'credits' or 'license' for more information > IPython 8.9.0 -- An enhanced Interactive Python. Type '?' for help. > /Users/ruifeng.zheng/Dev/spark/python/pyspark/shell.py:45: UserWarning: > Failed to initialize Spark session. > warnings.warn("Failed to initialize Spark session.") > Traceback (most recent call last): > File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/shell.py", line 40, in > > spark = SparkSession.builder.getOrCreate() > File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/session.py", line > 429, in getOrCreate > from pyspark.sql.connect.session import SparkSession as RemoteSparkSession > File > "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/connect/__init__.py", line > 21, in > from pyspark.sql.connect.dataframe import DataFrame # noqa: F401 > File > "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/connect/dataframe.py", > line 35, in > import pandas > File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/pandas/__init__.py", > line 29, in > from pyspark.pandas.missing.general_functions import > MissingPandasLikeGeneralFunctions > File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/pandas/__init__.py", > line 34, in > require_minimum_pandas_version() > File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/pandas/utils.py", > line 37, in require_minimum_pandas_version > if LooseVersion(pandas.__version__) < > LooseVersion(minimum_pandas_version): > AttributeError: partially initialized module 'pandas' has no attribute > '__version__' (most likely due to a circular import) > [TerminalIPythonApp] WARNING | Unknown error in handling PYTHONSTARTUP file > /Users/ruifeng.zheng/Dev/spark//python/pyspark/shell.py: > --- > AttributeErrorTraceback (most recent call last) > File ~/Dev/spark/python/pyspark/shell.py:40 > 38 try: > 39 # Creates pyspark.sql.connect.SparkSession. > ---> 40 spark = SparkSession.builder.getOrCreate() > 41 except Exception: > File ~/Dev/spark/python/pyspark/sql/session.py:429, in > SparkSession.Builder.getOrCreate(self) > 428 with SparkContext._lock: > --> 429 from pyspark.sql.connect.session import SparkSession as > RemoteSparkSession > 431 if ( > 432 SparkContext._active_spark_context is None > 433 and SparkSession._instantiatedSession is None > 434 ): > File ~/Dev/spark/python/pyspark/sql/connect/__init__.py:21 > 18 """Currently Spark Connect is very experimental and the APIs to > interact with > 19 Spark through this API are can be changed at any time without > warning.""" > ---> 21 from pyspark.sql.connect.dataframe import DataFrame # noqa: F401 > 22 from pyspark.sql.pandas.utils import ( > 23 require_minimum_pandas_version, > 24 require_minimum_pyarrow_version, > 25 require_minimum_grpc_version, > 26 ) > File ~/Dev/spark/python/pyspark/sql/connect/dataframe.py:35 > 34 import random > ---> 35 import pandas > 36 import json > File ~/Dev/spark/python/pyspark/pandas/__init__.py:29 > 27 from typing import Any > ---> 29 from pyspark.pandas.missing.general_functions import > MissingPandasLikeGeneralFunctions > 30 from pyspark.pandas.missing.scalars import MissingPandasLikeScalars > File ~/Dev/spark/python/pyspark/pandas/__init__.py:34 > 33 try: > ---> 34 require_minimum_pandas_version() > 35 require_minimum_pyarrow_version() > File ~/Dev/spark/python/pyspark/sql/pandas/utils.py:37, in > require_minimum_pandas_version() > 34 raise ImportError( > 35 "Pandas >= %s must be installed; however, " "it was not > found." % minimum_pandas_version > 36 ) from raised_error > ---> 37 if LooseVersion(pandas.__version__) < > LooseVersion(minimum_pandas_version): > 38 raise ImportError( > 39 "Pandas >= %s must be installed; however, " > 40 "your version was %s." % (minimum_pandas_version, > pandas.__version__) > 41 ) > AttributeError: partially initialized module 'pandas' has no attribute >