This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/kylin.git


The following commit(s) were added to refs/heads/main by this push:
     new 6416ea0  KYLIN-5011 Detect and scatter skewed data in dict encoding 
step
6416ea0 is described below

commit 6416ea0f426ccfa60bed707693bccc0e7676eac2
Author: zhengshengjun <zhengsheng...@apache.org>
AuthorDate: Tue Aug 10 17:11:33 2021 +0800

    KYLIN-5011 Detect and scatter skewed data in dict encoding step
---
 .../org/apache/kylin/engine/spark/builder/CubeTableEncoder.scala  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git 
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeTableEncoder.scala
 
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeTableEncoder.scala
index 8a5fe20..c9cea1f 100644
--- 
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeTableEncoder.scala
+++ 
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeTableEncoder.scala
@@ -111,12 +111,12 @@ object CubeTableEncoder extends Logging {
             partitionedDs = partitionedDs.select(columns ++ 
Seq(scatterColumn): _*)
               .repartition(enlargedBucketSize, col("scatter_skew_data_" + 
ref.columnName))
               .select(columns ++ Seq(encodeCol): _*)
-            return partitionedDs;
           }
+        } else {
+          partitionedDs = partitionedDs
+            .repartition(enlargedBucketSize, 
col(encodeColRef).cast(StringType))
+            .select(columns ++ Seq(encodeCol): _*)
         }
-        partitionedDs = partitionedDs
-          .repartition(enlargedBucketSize, col(encodeColRef).cast(StringType))
-          .select(columns ++ Seq(encodeCol): _*)
       }
     )
 

Reply via email to