This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 2b27b84558e [SPARK-40947][SPARK-40966][PS][INFRA][TEST] Upgrade pandas to 1.5.1 2b27b84558e is described below commit 2b27b84558e060057aed7cf3bc98a853cedd0604 Author: itholic <haejoon....@databricks.com> AuthorDate: Mon Oct 31 12:57:12 2022 -0700 [SPARK-40947][SPARK-40966][PS][INFRA][TEST] Upgrade pandas to 1.5.1 ### What changes were proposed in this pull request? This PR proposes upgrading pandas to 1.5.1, for pandas API on Spark. New version of pandas (1.5.1) was released last week (Oct 19, 2022). See [What's new in 1.5.1](https://pandas.pydata.org/pandas-docs/dev/whatsnew/v1.5.1.html) for more detail. ### Why are the changes needed? We should follow the behavior of latest pandas, and support it. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The existing tests should all pass. Closes #38420 from itholic/SPARK-40947. Authored-by: itholic <haejoon....@databricks.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- dev/infra/Dockerfile | 4 ++-- python/pyspark/pandas/supported_api_gen.py | 2 +- python/pyspark/pandas/tests/test_dataframe_spark_io.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index 24bad4db408..96b20894b87 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -32,7 +32,7 @@ RUN $APT_INSTALL software-properties-common git libxml2-dev pkg-config curl wget RUN update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9 -RUN python3.9 -m pip install numpy pyarrow 'pandas<=1.5.0' scipy unittest-xml-reporting plotly>=4.8 sklearn 'mlflow>=1.0' coverage matplotlib openpyxl +RUN python3.9 -m pip install numpy pyarrow 'pandas<=1.5.1' scipy unittest-xml-reporting plotly>=4.8 sklearn 'mlflow>=1.0' coverage matplotlib openpyxl RUN add-apt-repository ppa:pypy/ppa RUN apt update @@ -45,7 +45,7 @@ RUN mkdir -p /usr/local/pypy/pypy3.7 && \ ln -sf /usr/local/pypy/pypy3.7/bin/pypy /usr/local/bin/pypy3 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3 -RUN pypy3 -m pip install numpy 'pandas<=1.5.0' scipy coverage matplotlib +RUN pypy3 -m pip install numpy 'pandas<=1.5.1' scipy coverage matplotlib RUN $APT_INSTALL gnupg ca-certificates pandoc RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' >> /etc/apt/sources.list diff --git a/python/pyspark/pandas/supported_api_gen.py b/python/pyspark/pandas/supported_api_gen.py index 2af35923afb..9b25f614535 100644 --- a/python/pyspark/pandas/supported_api_gen.py +++ b/python/pyspark/pandas/supported_api_gen.py @@ -98,7 +98,7 @@ def generate_supported_api(output_rst_file_path: str) -> None: Write supported APIs documentation. """ - pandas_latest_version = "1.5.0" + pandas_latest_version = "1.5.1" if LooseVersion(pd.__version__) != LooseVersion(pandas_latest_version): msg = ( "Warning: Latest version of pandas (%s) is required to generate the documentation; " diff --git a/python/pyspark/pandas/tests/test_dataframe_spark_io.py b/python/pyspark/pandas/tests/test_dataframe_spark_io.py index a01f86ef605..dd83070a16c 100644 --- a/python/pyspark/pandas/tests/test_dataframe_spark_io.py +++ b/python/pyspark/pandas/tests/test_dataframe_spark_io.py @@ -99,7 +99,7 @@ class DataFrameSparkIOTest(PandasOnSparkTestCase, TestUtils): expected3 = expected2.set_index("index", append=True) # There is a bug in `to_parquet` from pandas 1.5.0 when writing MultiIndex. # See https://github.com/pandas-dev/pandas/issues/48848 for the reported issue. - if LooseVersion(pd.__version__) == LooseVersion("1.5.0"): + if LooseVersion(pd.__version__) > LooseVersion("1.5.0"): expected_psdf = ps.read_parquet(path2, pandas_metadata=True).set_index( "index", append=True ) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org