HyukjinKwon commented on a change in pull request #33323: URL: https://github.com/apache/spark/pull/33323#discussion_r669261877
########## File path: sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala ########## @@ -981,6 +1006,58 @@ class Dataset[T] private[sql]( join(right, usingColumns, "inner") } + /** + * Equi-join with another `DataFrame` using the given column. A cross join with a predicate + * is specified as an inner join. If you would explicitly like to perform a cross join use the + * `crossJoin` method. + * + * Different from other join functions, the join column will only appear once in the output, + * i.e. similar to SQL's `JOIN USING` syntax. + * + * @param right Right side of the join operation. + * @param usingColumn Name of the column to join on. This column must exist on both sides. + * @param joinType Type of join to perform. Default `inner`. Must be one of: + * `inner`, `cross`, `outer`, `full`, `fullouter`, `full_outer`, `left`, + * `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`, + * `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`, left_anti`. + * + * @note If you perform a self-join using this function without aliasing the input + * `DataFrame`s, you will NOT be able to reference any columns after the join, since + * there is no way to disambiguate which side of the join you would like to reference. + * + * @group untypedrel + * @since 3.1.3 + */ + def join(right: Dataset[_], usingColumn: String, joinType: String): DataFrame = { + join(right, Seq(usingColumn), joinType) + } + + /** + * (Java-specific) Equi-join with another `DataFrame` using the given columns. A cross join with + * a predicate is specified as an inner join. If you would explicitly like to perform a cross + * join use the `crossJoin` method. + * + * Different from other join functions, the join columns will only appear once in the output, + * i.e. similar to SQL's `JOIN USING` syntax. + * + * @param right Right side of the join operation. + * @param usingColumns Names of the columns to join on. This columns must exist on both sides. + * @param joinType Type of join to perform. Default `inner`. Must be one of: + * `inner`, `cross`, `outer`, `full`, `fullouter`, `full_outer`, `left`, + * `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`, + * `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`, left_anti`. + * + * @note If you perform a self-join using this function without aliasing the input + * `DataFrame`s, you will NOT be able to reference any columns after the join, since + * there is no way to disambiguate which side of the join you would like to reference. + * + * @group untypedrel + * @since 3.1.3 + */ + def join(right: Dataset[_], usingColumns: Array[String], joinType: String): DataFrame = { + join(right, usingColumns.toSeq, joinType) + } + /** * Equi-join with another `DataFrame` using the given columns. A cross join with a predicate Review comment: Please add "(Scala-specific)" on other methods -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org