GideonPotok commented on code in PR #47154:
URL: https://github.com/apache/spark/pull/47154#discussion_r1689766242


##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala:
##########
@@ -1789,44 +1798,90 @@ class CollationSQLExpressionsSuite
           s"named_struct('f2', collate('$elt', '${t.collationId}')), 'f3', 
1)").mkString(",")
       }.mkString(",")
 
-      val tableName = s"t_${t.collationId}_mode_nested_struct"
+      val tableName = s"t_${t.collationId}_mode_nested_struct1"
       withTable(tableName) {
         sql(s"CREATE TABLE ${tableName}(i STRUCT<f1: STRUCT<f2: STRING COLLATE 
" +
           t.collationId + ">, f3: INT>) USING parquet")
         sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
         val query = s"SELECT lower(mode(i).f1.f2) FROM ${tableName}"
-        if(t.collationId == "UTF8_LCASE" ||
-          t.collationId == "unicode_ci" ||
-          t.collationId == "unicode") {
-          // Cannot resolve "mode(i)" due to data type mismatch:
-          // Input to function mode was a complex type with strings collated 
on non-binary
-          // collations, which is not yet supported.. SQLSTATE: 42K09; line 1 
pos 13;
-          val params = Seq(("sqlExpr", "\"mode(i)\""),
-            ("msg", "The input to the function 'mode' " +
-              "was a type of binary-unstable type that is not currently 
supported by mode."),
-            ("hint", "")).toMap
-          checkError(
-            exception = intercept[AnalysisException] {
-              sql(query)
-            },
-            errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
-            parameters = params,
-            queryContext = Array(
-              ExpectedContext(objectType = "",
-                objectName = "",
-                startIndex = 13,
-                stopIndex = 19,
-                fragment = "mode(i)")
-            )
-          )
-        } else {
-          checkAnswer(sql(query), Row(t.result))
-        }
+        checkAnswer(sql(query), Row(t.result))
       }
     })
   }
 
   test("Support mode for string expression with collated strings in array 
complex type") {
+    case class ModeTestCase[R](collationId: String, bufferValues: Map[String, 
Long], result: R)
+    val testCases = Seq(
+      ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
+    )
+    testCases.foreach(t => {
+      val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
+        (0L to numRepeats).map(_ => s"array(named_struct('f2', " +
+          s"collate('$elt', '${t.collationId}'), 'f3', 1))").mkString(",")
+      }.mkString(",")
+
+      val tableName = s"t_${t.collationId}_mode_nested_struct2"
+      withTable(tableName) {
+        sql(s"CREATE TABLE ${tableName}(" +
+          s"i ARRAY< STRUCT<f2: STRING COLLATE ${t.collationId}, f3: INT>>)" +
+          s" USING parquet")
+        sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
+        val query = s"SELECT lower(element_at(mode(i).f2, 1)) FROM 
${tableName}"
+        checkAnswer(sql(query), Row(t.result))
+      }
+    })
+  }
+
+  test("Support mode for string expression with collated strings in 3D array 
type") {

Review Comment:
   @uros-db 
   
   How about I replace it with this, calling recursive functions within 
getCollationAwareBuffer, like so:
   
   ```
     private def getCollationAwareBuffer(
         childDataType: DataType,
         buffer: OpenHashMap[AnyRef, Long]): Iterable[(AnyRef, Long)] = {
       def getBuffer(groupingFunction: AnyRef => _): Iterable[(AnyRef, Long)] = 
{
         buffer.groupMapReduce(t =>
           groupingFunction(t._1))(x => x)((x, y) => (x._1, x._2 + y._2)).values
       }
       childDataType match {
         // Short-circuit if there is no collation.
         case _ if UnsafeRowUtils.isBinaryStable(child.dataType) => buffer
         case c: StringType => getBuffer(k =>
           CollationFactory.getCollationKey(k.asInstanceOf[UTF8String], 
c.collationId))
         case at: ArrayType => getBuffer(k =>
           recursivelyGetBufferForArrayType(at, k.asInstanceOf[ArrayData]))
         case st: StructType =>
           getBuffer(k => recursivelyGetBufferForStructType(
             k.asInstanceOf[InternalRow].toSeq(st).zip(st.fields)))
         // Not supported: MapType
         case _ => buffer
       }
     }
     ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to