This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 28cf3db77932 [SPARK-48259][CONNECT][TESTS] Add 3 missing methods in dsl
28cf3db77932 is described below

commit 28cf3db779322a487d26fa17282889e217f2d6b5
Author: Ruifeng Zheng <ruife...@apache.org>
AuthorDate: Tue May 14 10:16:21 2024 +0800

    [SPARK-48259][CONNECT][TESTS] Add 3 missing methods in dsl
    
    ### What changes were proposed in this pull request?
    Add 3 missing methods in dsl
    
    ### Why are the changes needed?
    those methods could be used in tests
    
    ### Does this PR introduce _any_ user-facing change?
    no, test only
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #46559 from zhengruifeng/missing_3_func.
    
    Authored-by: Ruifeng Zheng <ruife...@apache.org>
    Signed-off-by: Ruifeng Zheng <ruife...@apache.org>
---
 .../org/apache/spark/sql/connect/dsl/package.scala | 27 ++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git 
a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
 
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
index 6aadb6c34b77..da9a0865b8ca 100644
--- 
a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
+++ 
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
@@ -513,6 +513,25 @@ package object dsl {
         freqItems(cols.toArray, support)
 
       def freqItems(cols: Seq[String]): Relation = freqItems(cols, 0.01)
+
+      def sampleBy(col: String, fractions: Map[Any, Double], seed: Long): 
Relation = {
+        Relation
+          .newBuilder()
+          .setSampleBy(
+            StatSampleBy
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllFractions(fractions.toSeq.map { case (k, v) =>
+                StatSampleBy.Fraction
+                  .newBuilder()
+                  .setStratum(toLiteralProto(k))
+                  .setFraction(v)
+                  .build()
+              }.asJava)
+              .setSeed(seed)
+              .build())
+          .build()
+      }
     }
 
     def select(exprs: Expression*): Relation = {
@@ -587,6 +606,10 @@ package object dsl {
           .build()
       }
 
+      def filter(condition: Expression): Relation = {
+        where(condition)
+      }
+
       def deduplicate(colNames: Seq[String]): Relation =
         Relation
           .newBuilder()
@@ -641,6 +664,10 @@ package object dsl {
         join(otherPlan, joinType, usingColumns, None)
       }
 
+      def crossJoin(otherPlan: Relation): Relation = {
+        join(otherPlan, JoinType.JOIN_TYPE_CROSS, Seq(), None)
+      }
+
       private def join(
           otherPlan: Relation,
           joinType: JoinType = JoinType.JOIN_TYPE_INNER,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to