cloud-fan commented on a change in pull request #26256: [SPARK-29605][SQL] 
Optimize string to interval casting
URL: https://github.com/apache/spark/pull/26256#discussion_r343097985
 
 

 ##########
 File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
 ##########
 @@ -388,4 +388,192 @@ object IntervalUtils {
   def divide(interval: CalendarInterval, num: Double): CalendarInterval = {
     fromDoubles(interval.months / num, interval.days / num, 
interval.microseconds / num)
   }
+
+  private object ParseState extends Enumeration {
+    val PREFIX,
+        BEGIN_VALUE,
+        PARSE_SIGN,
+        PARSE_UNIT_VALUE,
+        FRACTIONAL_PART,
+        BEGIN_UNIT_NAME,
+        UNIT_NAME_SUFFIX,
+        END_UNIT_NAME = Value
+  }
+  private final val intervalStr = UTF8String.fromString("interval ")
+  private final val yearStr = UTF8String.fromString("year")
+  private final val monthStr = UTF8String.fromString("month")
+  private final val weekStr = UTF8String.fromString("week")
+  private final val dayStr = UTF8String.fromString("day")
+  private final val hourStr = UTF8String.fromString("hour")
+  private final val minuteStr = UTF8String.fromString("minute")
+  private final val secondStr = UTF8String.fromString("second")
+  private final val millisStr = UTF8String.fromString("millisecond")
+  private final val microsStr = UTF8String.fromString("microsecond")
+
+  def stringToInterval(input: UTF8String): CalendarInterval = {
+    import ParseState._
+
+    if (input == null) {
+      return null
+    }
+    // scalastyle:off caselocale .toLowerCase
+    val s = input.trim.toLowerCase
+    // scalastyle:on
+    val bytes = s.getBytes
+    if (bytes.length == 0) {
+      return null
+    }
+    var state = PREFIX
+    var i = 0
+    var currentValue: Long = 0
+    var isNegative: Boolean = false
+    var months: Int = 0
+    var days: Int = 0
+    var microseconds: Long = 0
+    var fractionScale: Int = 0
+    var fraction: Int = 0
+
+    while (i < bytes.length) {
+      val b = bytes(i)
+      state match {
+        case PREFIX =>
+          if (s.startsWith(intervalStr)) {
+            if (s.numBytes() == intervalStr.numBytes()) {
+              return null
+            } else {
+              i += intervalStr.numBytes()
+            }
+          }
+          state = BEGIN_VALUE
+        case BEGIN_VALUE =>
+          b match {
+            case ' ' => i += 1
+            case _ => state = PARSE_SIGN
+          }
+        case PARSE_SIGN =>
+          b match {
+            case '-' =>
+              isNegative = true
+              i += 1
+            case '+' =>
+              isNegative = false
+              i += 1
+            case _ if '0' <= b && b <= '9' =>
+              isNegative = false
+            case _ => return null
+          }
+          state = PARSE_UNIT_VALUE
+          currentValue = 0
+          fraction = 0
+        case PARSE_UNIT_VALUE =>
+          b match {
+            case _ if '0' <= b && b <= '9' =>
+              try {
+                currentValue = Math.addExact(Math.multiplyExact(10, 
currentValue), (b - '0'))
+              } catch {
+                case _: ArithmeticException => return null
+              }
+            case ' ' =>
+              state = BEGIN_UNIT_NAME
+            case '.' =>
+              fractionScale = 100000
 
 Review comment:
   the antlr version(`IntervalUtils.parseNanos`) supports up t0 9 digits in the 
fraction part. Shall we follow?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to