This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push: new 35942fc [SPARK-37624][PYTHON][DOCS] Suppress warnings for live pandas-on-Spark quickstart notebooks 35942fc is described below commit 35942fc398052512ba1f3e8e87f32861dbfe579c Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Mon Dec 13 17:55:45 2021 +0900 [SPARK-37624][PYTHON][DOCS] Suppress warnings for live pandas-on-Spark quickstart notebooks This PR proposes to suppress warnings, in live pandas-on-Spark quickstart notebooks, as below: <img width="1109" alt="Screen Shot 2021-12-13 at 2 02 45 PM" src="https://user-images.githubusercontent.com/6477701/145756407-03b9d94e-a082-42a1-8052-d14b4989ae86.png"> <img width="1103" alt="Screen Shot 2021-12-13 at 2 02 25 PM" src="https://user-images.githubusercontent.com/6477701/145756419-b5dfa729-96fa-4646-bb26-40302d33632b.png"> <img width="1100" alt="Screen Shot 2021-12-13 at 2 02 20 PM" src="https://user-images.githubusercontent.com/6477701/145756420-b8e1b105-495b-4b1c-ab3c-4d47793ba80e.png"> <img width="1027" alt="Screen Shot 2021-12-13 at 1 32 05 PM" src="https://user-images.githubusercontent.com/6477701/145756424-bf93a4a1-2587-49fb-9abb-e2f6de032e48.png"> This is a user-facing quickstart that is interactive shall, and showing a lot of warnings makes difficult to follow the output. Note that we also set a lower log4j level to interpreters. Users will see clean output in live quickstart notebook: https://mybinder.org/v2/gh/apache/spark/9e614e265f?filepath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart_ps.ipynb I manually tested at https://mybinder.org/v2/gh/HyukjinKwon/spark/cleanup-ps-quickstart?labpath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart_ps.ipynb Closes #34875 from HyukjinKwon/cleanup-ps-quickstart. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit 195da5623c36ce54e85bc6584f7b49107899ffff) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- binder/postBuild | 9 +++++++++ python/docs/source/getting_started/quickstart_ps.ipynb | 8 ++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/binder/postBuild b/binder/postBuild index 146243b..733eafe 100644 --- a/binder/postBuild +++ b/binder/postBuild @@ -33,3 +33,12 @@ else fi pip install plotly "pyspark[sql,ml,mllib,pandas_on_spark]$SPECIFIER$VERSION" + +# Set 'PYARROW_IGNORE_TIMEZONE' to surpress warnings from PyArrow. +echo "export PYARROW_IGNORE_TIMEZONE=1" >> ~/.profile + +# Surpress warnings from Spark jobs, and UI progress bar. +mkdir -p ~/.ipython/profile_default/startup +echo """from pyspark.sql import SparkSession +SparkSession.builder.config('spark.ui.showConsoleProgress', 'false').getOrCreate().sparkContext.setLogLevel('FATAL') +""" > ~/.ipython/profile_default/startup/00-init.py diff --git a/python/docs/source/getting_started/quickstart_ps.ipynb b/python/docs/source/getting_started/quickstart_ps.ipynb index 74d6724..ff7512a 100644 --- a/python/docs/source/getting_started/quickstart_ps.ipynb +++ b/python/docs/source/getting_started/quickstart_ps.ipynb @@ -1619,7 +1619,7 @@ "metadata": {}, "outputs": [], "source": [ - "prev = spark.conf.get(\"spark.sql.execution.arrow.enabled\") # Keep its default value.\n", + "prev = spark.conf.get(\"spark.sql.execution.arrow.pyspark.enabled\") # Keep its default value.\n", "ps.set_option(\"compute.default_index_type\", \"distributed\") # Use default index prevent overhead.\n", "import warnings\n", "warnings.filterwarnings(\"ignore\") # Ignore warnings coming from Arrow optimizations." @@ -1639,7 +1639,7 @@ } ], "source": [ - "spark.conf.set(\"spark.sql.execution.arrow.enabled\", True)\n", + "spark.conf.set(\"spark.sql.execution.arrow.pyspark.enabled\", True)\n", "%timeit ps.range(300000).to_pandas()" ] }, @@ -1657,7 +1657,7 @@ } ], "source": [ - "spark.conf.set(\"spark.sql.execution.arrow.enabled\", False)\n", + "spark.conf.set(\"spark.sql.execution.arrow.pyspark.enabled\", False)\n", "%timeit ps.range(300000).to_pandas()" ] }, @@ -1668,7 +1668,7 @@ "outputs": [], "source": [ "ps.reset_option(\"compute.default_index_type\")\n", - "spark.conf.set(\"spark.sql.execution.arrow.enabled\", prev) # Set its default value back." + "spark.conf.set(\"spark.sql.execution.arrow.pyspark.enabled\", prev) # Set its default value back." ] }, { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org