This is an automated email from the ASF dual-hosted git repository.
xianjin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git
The following commit(s) were added to refs/heads/master by this push:
new c1f627b6 [#552] docs: add more doc about spark.serializer requirement
(#556)
c1f627b6 is described below
commit c1f627b6a58bc5befa40238c1d19a802609c03c4
Author: advancedxy <[email protected]>
AuthorDate: Wed Feb 8 13:07:58 2023 +0800
[#552] docs: add more doc about spark.serializer requirement (#556)
### What changes were proposed in this pull request?
add more doc and warning
### Why are the changes needed?
Fixes #552
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
No need.
---
README.md | 3 +++
.../src/main/java/org/apache/spark/shuffle/RssShuffleManager.java | 8 ++++++++
docs/client_guide.md | 3 +++
3 files changed, 14 insertions(+)
diff --git a/README.md b/README.md
index 2454e0a4..f419f8d5 100644
--- a/README.md
+++ b/README.md
@@ -196,6 +196,9 @@ rss-xxx.tgz will be generated for deployment
2. Update Spark conf to enable Uniffle, e.g.,
```
+ # Uniffle transmits serialized shuffle data over network, therefore a
serializer that supports relocation of
+ # serialized object should be used.
+ spark.serialier org.apache.spark.serializer.KryoSerializer # this could
also be in the spark-defaults.conf
spark.shuffle.manager org.apache.spark.shuffle.RssShuffleManager
spark.rss.coordinator.quorum <coordinatorIp1>:19999,<coordinatorIp2>:19999
# Note: For Spark2, spark.sql.adaptive.enabled should be false because
Spark2 doesn't support AQE.
diff --git
a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
index e095637b..a768a449 100644
---
a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
+++
b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
@@ -20,6 +20,7 @@ package org.apache.spark.shuffle;
import java.util.Collections;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
@@ -153,6 +154,13 @@ public class RssShuffleManager implements ShuffleManager {
public RssShuffleManager(SparkConf conf, boolean isDriver) {
this.sparkConf = conf;
+ boolean supportsRelocation = Optional.ofNullable(SparkEnv.get())
+ .map(env -> env.serializer().supportsRelocationOfSerializedObjects())
+ .orElse(true);
+ if (!supportsRelocation) {
+ LOG.warn("RSSShuffleManager requires a serializer which supports
relocations of serialized object. Please set "
+ + "spark.serializer to org.apache.spark.serializer.KryoSerializer
instead");
+ }
this.user = sparkConf.get("spark.rss.quota.user", "user");
this.uuid = sparkConf.get("spark.rss.quota.uuid",
Long.toString(System.currentTimeMillis()));
// set & check replica config
diff --git a/docs/client_guide.md b/docs/client_guide.md
index c2884fa0..12c99c91 100644
--- a/docs/client_guide.md
+++ b/docs/client_guide.md
@@ -38,6 +38,9 @@ This document will introduce how to deploy Uniffle client
plugins with Spark and
2. Update Spark conf to enable Uniffle, eg,
```
+ # Uniffle transmits serialized shuffle data over network, therefore a
serializer that supports relocation of
+ # serialized object should be used.
+ spark.serialier org.apache.spark.serializer.KryoSerializer # this could
also be in the spark-defaults.conf
spark.shuffle.manager org.apache.spark.shuffle.RssShuffleManager
spark.rss.coordinator.quorum <coordinatorIp1>:19999,<coordinatorIp2>:19999
# Note: For Spark2, spark.sql.adaptive.enabled should be false because
Spark2 doesn't support AQE.