[v8-dev] [v8] r4333 committed - StringToInt rewritten. This version doesn't allocate memory for long d...

codesite-noreply Wed, 31 Mar 2010 10:20:27 -0700

Revision: 4333
Author: [email protected]
Date: Wed Mar 31 10:19:05 2010

Log: StringToInt rewritten. This version doesn't allocate memory for longdecimals and uses percise rounding if radix 10 or a power of 2 (in othercases rounding error still may occur). Handling special values moved fromRuntime_StringParseInt into StringToInt in order to make it consistent withStringToDouble.



Committed: http://code.google.com/p/v8/source/detail?r=4329
Review URL: http://codereview.chromium.org/1529004
http://code.google.com/p/v8/source/detail?r=4333

Modified:
 /branches/bleeding_edge/src/conversions.cc
 /branches/bleeding_edge/src/conversions.h
 /branches/bleeding_edge/src/runtime.cc
 /branches/bleeding_edge/test/mjsunit/parse-int-float.js
 /branches/bleeding_edge/test/mjsunit/str-to-num.js

=======================================
--- /branches/bleeding_edge/src/conversions.cc  Wed Mar 31 04:13:42 2010
+++ /branches/bleeding_edge/src/conversions.cc  Wed Mar 31 10:19:05 2010
@@ -47,51 +47,6 @@
     return c - 'A' + 10;
   return -1;
 }
-
-
-// Provide a common interface to getting a character at a certain
-// index from a char* or a String object.
-static inline int GetChar(const char* str, int index) {
-  ASSERT(index >= 0 && index < StrLength(str));
-  return str[index];
-}
-
-
-static inline int GetChar(String* str, int index) {
-  return str->Get(index);
-}
-
-
-static inline int GetLength(const char* str) {
-  return StrLength(str);
-}
-
-
-static inline int GetLength(String* str) {
-  return str->length();
-}
-
-
-static inline const char* GetCString(const char* str, int index) {
-  return str + index;
-}
-
-
-static inline const char* GetCString(String* str, int index) {
-  int length = str->length();
-  char* result = NewArray<char>(length + 1);
-  for (int i = index; i < length; i++) {
-    uc16 c = str->Get(i);
-    if (c <= 127) {
-      result[i - index] = static_cast<char>(c);
-    } else {
-      result[i - index] = 127;  // Force number parsing to fail.
-    }
-  }
-  result[length - index] = '\0';
-  return result;
-}
-

 namespace {

@@ -132,15 +87,6 @@
   }
 }
 }
-
-
-static inline void ReleaseCString(const char* original, const char* str) {
-}
-
-
-static inline void ReleaseCString(String* original, const char* str) {
-  DeleteArray(const_cast<char *>(str));
-}


 template <class Iterator, class EndMark>
@@ -168,97 +114,6 @@
 // we don't need to preserve all the digits.
 const int kMaxSignificantDigits = 772;

-// Parse an int from a string starting a given index and in a given
-// radix.  The string can be either a char* or a String*.
-template <class S>
-static int InternalStringToInt(S* s, int i, int radix, double* value) {
-  int len = GetLength(s);
-
-  // Setup limits for computing the value.
-  ASSERT(2 <= radix && radix <= 36);
-  int lim_0 = '0' + (radix < 10 ? radix : 10);
-  int lim_a = 'a' + (radix - 10);
-  int lim_A = 'A' + (radix - 10);
-
-  // NOTE: The code for computing the value may seem a bit complex at
-  // first glance. It is structured to use 32-bit multiply-and-add
-  // loops as long as possible to avoid loosing precision.
-
-  double v = 0.0;
-  int j;
-  for (j = i; j < len;) {
-    // Parse the longest part of the string starting at index j
-    // possible while keeping the multiplier, and thus the part
-    // itself, within 32 bits.
-    uint32_t part = 0, multiplier = 1;
-    int k;
-    for (k = j; k < len; k++) {
-      int c = GetChar(s, k);
-      if (c >= '0' && c < lim_0) {
-        c = c - '0';
-      } else if (c >= 'a' && c < lim_a) {
-        c = c - 'a' + 10;
-      } else if (c >= 'A' && c < lim_A) {
-        c = c - 'A' + 10;
-      } else {
-        break;
-      }
-
-      // Update the value of the part as long as the multiplier fits
-      // in 32 bits. When we can't guarantee that the next iteration
-      // will not overflow the multiplier, we stop parsing the part
-      // by leaving the loop.
-      static const uint32_t kMaximumMultiplier = 0xffffffffU / 36;
-      uint32_t m = multiplier * radix;
-      if (m > kMaximumMultiplier) break;
-      part = part * radix + c;
-      multiplier = m;
-      ASSERT(multiplier > part);
-    }
-
-    // Compute the number of part digits. If no digits were parsed;
-    // we're done parsing the entire string.
-    int digits = k - j;
-    if (digits == 0) break;
-
-    // Update the value and skip the part in the string.
-    ASSERT(multiplier ==
-           pow(static_cast<double>(radix), static_cast<double>(digits)));
-    v = v * multiplier + part;
-    j = k;
-  }
-
-  // If the resulting value is larger than 2^53 the value does not fit
-  // in the mantissa of the double and there is a loss of precision.
-  // When the value is larger than 2^53 the rounding depends on the
-  // code generation.  If the code generator spills the double value
-  // it uses 64 bits and if it does not it uses 80 bits.
-  //
-  // If there is a potential for overflow we resort to strtod for
-  // radix 10 numbers to get higher precision.  For numbers in another
-  // radix we live with the loss of precision.
-  static const double kPreciseConversionLimit = 9007199254740992.0;
-  if (radix == 10 && v > kPreciseConversionLimit) {
-    const char* cstr = GetCString(s, i);
-    const char* end;
-    v = gay_strtod(cstr, &end);
-    ReleaseCString(s, cstr);
-  }
-
-  *value = v;
-  return j;
-}
-
-
-int StringToInt(String* str, int index, int radix, double* value) {
-  return InternalStringToInt(str, index, radix, value);
-}
-
-
-int StringToInt(const char* str, int index, int radix, double* value) {
-  return InternalStringToInt(const_cast<char*>(str), index, radix, value);
-}
-

 static const double JUNK_STRING_VALUE = OS::nan_value();

@@ -279,20 +134,25 @@
       || (radix > 10 && x >= 'a' && x < 'a' + radix - 10)
       || (radix > 10 && x >= 'A' && x < 'A' + radix - 10);
 }
+
+
+static double SignedZero(bool sign) {
+  return sign ? -0.0 : 0.0;
+}


 // Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
 template <int radix_log_2, class Iterator, class EndMark>
-    static double InternalStringToIntDouble(Iterator current,
-                                            EndMark end,
-                                            bool sign,
-                                            bool allow_trailing_junk) {
+static double InternalStringToIntDouble(Iterator current,
+                                        EndMark end,
+                                        bool sign,
+                                        bool allow_trailing_junk) {
   ASSERT(current != end);

   // Skip leading 0s.
   while (*current == '0') {
     ++current;
-    if (current == end) return sign ? -0.0 : 0.0;
+    if (current == end) return SignedZero(sign);
   }

   int64_t number = 0;
@@ -380,6 +240,183 @@
   // and sign. Assuming it's a rare case more simple code is used.
   return static_cast<double>(sign ? -number : number) * pow(2.0, exponent);
 }
+
+
+template <class Iterator, class EndMark>

+static double InternalStringToInt(Iterator current, EndMark end, intradix) {

+  const bool allow_trailing_junk = true;
+  const double empty_string_val = JUNK_STRING_VALUE;
+
+  if (!AdvanceToNonspace(&current, end)) return empty_string_val;
+
+  bool sign = false;
+  bool leading_zero = false;
+
+  if (*current == '+') {
+    // Ignore leading sign; skip following spaces.
+    ++current;
+    if (!AdvanceToNonspace(&current, end)) return JUNK_STRING_VALUE;
+  } else if (*current == '-') {
+    ++current;
+    if (!AdvanceToNonspace(&current, end)) return JUNK_STRING_VALUE;
+    sign = true;
+  }
+
+  if (radix == 0) {
+    // Radix detection.
+    if (*current == '0') {
+      ++current;
+      if (current == end) return SignedZero(sign);
+      if (*current == 'x' || *current == 'X') {
+        radix = 16;
+        ++current;
+        if (current == end) return JUNK_STRING_VALUE;
+      } else {
+        radix = 8;
+        leading_zero = true;
+      }
+    } else {
+      radix = 10;
+    }
+  } else if (radix == 16) {
+    if (*current == '0') {
+      // Allow "0x" prefix.
+      ++current;
+      if (current == end) return SignedZero(sign);
+      if (*current == 'x' || *current == 'X') {
+        ++current;
+        if (current == end) return JUNK_STRING_VALUE;
+      } else {
+        leading_zero = true;
+      }
+    }
+  }
+
+  if (radix < 2 || radix > 36) return JUNK_STRING_VALUE;
+
+  // Skip leading zeros.
+  while (*current == '0') {
+    leading_zero = true;
+    ++current;
+    if (current == end) return SignedZero(sign);
+  }
+
+  if (!leading_zero && !isDigit(*current, radix)) {
+    return JUNK_STRING_VALUE;
+  }
+
+  if (IsPowerOf2(radix)) {
+    switch (radix) {
+      case 2:
+        return InternalStringToIntDouble<1>(
+                   current, end, sign, allow_trailing_junk);
+      case 4:
+        return InternalStringToIntDouble<2>(
+                   current, end, sign, allow_trailing_junk);
+      case 8:
+        return InternalStringToIntDouble<3>(
+                   current, end, sign, allow_trailing_junk);
+
+      case 16:
+        return InternalStringToIntDouble<4>(
+                   current, end, sign, allow_trailing_junk);
+
+      case 32:
+        return InternalStringToIntDouble<5>(
+                   current, end, sign, allow_trailing_junk);
+      default:
+        UNREACHABLE();
+    }
+  }
+
+  if (radix == 10) {
+    // Parsing with strtod.

+ const int kMaxSignificantDigits = 309; // Doubles are less than1.8e308.+ // The buffer may contain up to kMaxSignificantDigits + 1 digits and azero

+    // end.
+    const int kBufferSize = kMaxSignificantDigits + 2;
+    char buffer[kBufferSize];
+    int buffer_pos = 0;
+    while (*current >= '0' && *current <= '9') {
+      if (buffer_pos <= kMaxSignificantDigits) {

+ // If the number has more than kMaxSignificantDigits it will beparsed

+        // as infinity.
+        ASSERT(buffer_pos < kBufferSize);
+        buffer[buffer_pos++] = static_cast<char>(*current);
+      }
+      ++current;
+      if (current == end) break;
+    }
+
+    if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+      return JUNK_STRING_VALUE;
+    }
+
+    ASSERT(buffer_pos < kBufferSize);
+    buffer[buffer_pos++] = '\0';
+    return sign ? -gay_strtod(buffer, NULL) : gay_strtod(buffer, NULL);
+  }
+
+  // TODO(serya): The following legacy code causes accumulating rounding
+  // error for number greater than ~2^56. It should be rewritten using long
+  // arithmetic.
+
+  int lim_0 = '0' + (radix < 10 ? radix : 10);
+  int lim_a = 'a' + (radix - 10);
+  int lim_A = 'A' + (radix - 10);
+
+  // NOTE: The code for computing the value may seem a bit complex at
+  // first glance. It is structured to use 32-bit multiply-and-add
+  // loops as long as possible to avoid loosing precision.
+
+  double v = 0.0;
+  bool done = false;
+  do {
+    // Parse the longest part of the string starting at index j
+    // possible while keeping the multiplier, and thus the part
+    // itself, within 32 bits.
+    unsigned int part = 0, multiplier = 1;
+    while (true) {
+      int d;
+      if (*current >= '0' && *current < lim_0) {
+        d = *current - '0';
+      } else if (*current >= 'a' && *current < lim_a) {
+        d = *current - 'a' + 10;
+      } else if (*current >= 'A' && *current < lim_A) {
+        d = *current - 'A' + 10;
+      } else {
+        done = true;
+        break;
+      }
+
+      // Update the value of the part as long as the multiplier fits
+      // in 32 bits. When we can't guarantee that the next iteration
+      // will not overflow the multiplier, we stop parsing the part
+      // by leaving the loop.
+      const unsigned int kMaximumMultiplier = 0xffffffffU / 36;
+      uint32_t m = multiplier * radix;
+      if (m > kMaximumMultiplier) break;
+      part = part * radix + d;
+      multiplier = m;
+      ASSERT(multiplier > part);
+
+      ++current;
+      if (current == end) {
+        done = true;
+        break;
+      }
+    }
+
+    // Update the value and skip the part in the string.
+    v = v * multiplier + part;
+  } while (!done);
+
+  if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+    return JUNK_STRING_VALUE;
+  }
+
+  return sign ? -v : v;
+}


 // Converts a string to a double value. Assumes the Iterator supports
@@ -417,7 +454,7 @@
   bool nonzero_digit_dropped = false;
   bool fractional_part = false;

-  double signed_zero = 0.0;
+  bool sign = false;

   if (*current == '+') {
     // Ignore leading sign; skip following spaces.
@@ -427,7 +464,7 @@
     buffer[buffer_pos++] = '-';
     ++current;
     if (!AdvanceToNonspace(&current, end)) return JUNK_STRING_VALUE;
-    signed_zero = -0.0;
+    sign = true;
   }

   static const char kInfinitySymbol[] = "Infinity";
@@ -447,14 +484,16 @@
   bool leading_zero = false;
   if (*current == '0') {
     ++current;
-    if (current == end) return signed_zero;
+    if (current == end) return SignedZero(sign);

     leading_zero = true;

-// It could be hexadecimal value.
+    // It could be hexadecimal value.
     if ((flags & ALLOW_HEX) && (*current == 'x' || *current == 'X')) {
       ++current;
-      if (current == end) return JUNK_STRING_VALUE;  // "0x".
+      if (current == end || !isDigit(*current, 16)) {
+        return JUNK_STRING_VALUE;  // "0x".
+      }

       bool sign = (buffer_pos > 0 && buffer[0] == '-');
       return InternalStringToIntDouble<4>(current,
@@ -466,7 +505,7 @@
     // Ignore leading zeros in the integer part.
     while (*current == '0') {
       ++current;
-      if (current == end) return signed_zero;
+      if (current == end) return SignedZero(sign);
     }
   }

@@ -508,7 +547,7 @@
       // leading zeros (if any).
       while (*current == '0') {
         ++current;
-        if (current == end) return signed_zero;
+        if (current == end) return SignedZero(sign);
         exponent--;  // Move this 0 into the exponent.
       }
     }
@@ -635,7 +674,7 @@
     ASSERT(exponent == 0);
     buffer_pos += exp_digits;
   } else if (!fractional_part && significant_digits <= kMaxDigitsInInt) {
-    if (significant_digits == 0) return signed_zero;
+    if (significant_digits == 0) return SignedZero(sign);
     ASSERT(buffer_pos > 0);
     int num = 0;
     int start_pos = (buffer[0] == '-' ? 1 : 0);
@@ -670,6 +709,25 @@
                                   empty_string_val);
   }
 }
+
+
+double StringToInt(String* str, int radix) {
+  StringShape shape(str);
+  if (shape.IsSequentialAscii()) {
+    const char* begin = SeqAsciiString::cast(str)->GetChars();
+    const char* end = begin + str->length();
+    return InternalStringToInt(begin, end, radix);
+  } else if (shape.IsSequentialTwoByte()) {
+    const uc16* begin = SeqTwoByteString::cast(str)->GetChars();
+    const uc16* end = begin + str->length();
+    return InternalStringToInt(begin, end, radix);
+  } else {
+    StringInputBuffer buffer(str);
+    return InternalStringToInt(StringInputBufferIterator(&buffer),
+                               StringInputBufferIterator::EndMarker(),
+                               radix);
+  }
+}

double StringToDouble(const char* str, int flags, double empty_string_val){

=======================================
--- /branches/bleeding_edge/src/conversions.h   Wed Mar 31 04:13:42 2010
+++ /branches/bleeding_edge/src/conversions.h   Wed Mar 31 10:19:05 2010
@@ -100,8 +100,7 @@
 double StringToDouble(String* str, int flags, double empty_string_val = 0);

 // Converts a string into an integer.
-int StringToInt(String* str, int index, int radix, double* value);
-int StringToInt(const char* str, int index, int radix, double* value);
+double StringToInt(String* str, int radix);

 // Converts a double to a string value according to ECMA-262 9.8.1.
 // The buffer should be large enough for any floating point number.
=======================================
--- /branches/bleeding_edge/src/runtime.cc      Wed Mar 31 05:00:57 2010
+++ /branches/bleeding_edge/src/runtime.cc      Wed Mar 31 10:19:05 2010
@@ -4739,49 +4739,9 @@

   s->TryFlatten();

-  int len = s->length();
-  int i;
-
-  // Skip leading white space.
-  for (i = 0; i < len && Scanner::kIsWhiteSpace.get(s->Get(i)); i++) ;
-  if (i == len) return Heap::nan_value();
-
-  // Compute the sign (default to +).
-  int sign = 1;
-  if (s->Get(i) == '-') {
-    sign = -1;
-    i++;
-  } else if (s->Get(i) == '+') {
-    i++;
-  }
-
-  // Compute the radix if 0.
-  if (radix == 0) {
-    radix = 10;
-    if (i < len && s->Get(i) == '0') {
-      radix = 8;
-      if (i + 1 < len) {
-        int c = s->Get(i + 1);
-        if (c == 'x' || c == 'X') {
-          radix = 16;
-          i += 2;
-        }
-      }
-    }
-  } else if (radix == 16) {
-    // Allow 0x or 0X prefix if radix is 16.
-    if (i + 1 < len && s->Get(i) == '0') {
-      int c = s->Get(i + 1);
-      if (c == 'x' || c == 'X') i += 2;
-    }
-  }
-
-  RUNTIME_ASSERT(2 <= radix && radix <= 36);
-  double value;
-  int end_index = StringToInt(s, i, radix, &value);
-  if (end_index != i) {
-    return Heap::NumberFromDouble(sign * value);
-  }
+  RUNTIME_ASSERT(radix == 0 || (2 <= radix && radix <= 36));
+  double value = StringToInt(s, radix);
+  return Heap::NumberFromDouble(value);
   return Heap::nan_value();
 }

=======================================

--- /branches/bleeding_edge/test/mjsunit/parse-int-float.js Wed Mar 3104:13:42 2010+++ /branches/bleeding_edge/test/mjsunit/parse-int-float.js Wed Mar 3110:19:05 2010

@@ -42,7 +42,10 @@
 assertEquals(0x12, parseInt('0x12', 16));
 assertEquals(0x12, parseInt('0x12', 16.1));
 assertEquals(0x12, parseInt('0x12', NaN));
-
+assertTrue(isNaN(parseInt('0x  ')));
+assertTrue(isNaN(parseInt('0x')));
+assertTrue(isNaN(parseInt('0x  ', 16)));
+assertTrue(isNaN(parseInt('0x', 16)));
 assertEquals(12, parseInt('12aaa'));

 assertEquals(0.1, parseFloat('0.1'));
@@ -51,6 +54,20 @@
 assertEquals(0, parseFloat('0x12'));
 assertEquals(77, parseFloat('077'));

+assertEquals(Infinity,parseInt('1000000000000000000000000000000000000000000000'++ '000000000000000000000000000000000000000000000000000000000000000000000000'++ '000000000000000000000000000000000000000000000000000000000000000000000000'++ '000000000000000000000000000000000000000000000000000000000000000000000000'++ '000000000000000000000000000000000000000000000000000000000000000000000000'

+    + '0000000000000'));
+

+assertEquals(Infinity,parseInt('0x10000000000000000000000000000000000000000000'++ '000000000000000000000000000000000000000000000000000000000000000000000000'++ '000000000000000000000000000000000000000000000000000000000000000000000000'++ '000000000000000000000000000000000000000000000000000000000000000000000000'++ '000000000000000000000000000000000000000000000000000000000000000000000000'

+    + '0000000000000'));
+

 var i;
 var y = 10;
=======================================

--- /branches/bleeding_edge/test/mjsunit/str-to-num.js Mon Mar 29 08:46:582010+++ /branches/bleeding_edge/test/mjsunit/str-to-num.js Wed Mar 31 10:19:052010

@@ -62,6 +62,10 @@
 assertEquals(123, toNumber("\t123\t"));
 assertEquals(123, toNumber("\f123\f"));

+assertEquals(16, toNumber(" 0x10 "));
+assertEquals(NaN, toNumber("0x"));
+assertEquals(NaN, toNumber("0x "));
+
 assertTrue(isNaN(toNumber(" NaN ")));
 assertEquals(Infinity,  toNumber(" Infinity ") ," Infinity");
 assertEquals(-Infinity, toNumber(" -Infinity "));

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

To unsubscribe, reply using "remove me" as the subject.

[v8-dev] [v8] r4333 committed - StringToInt rewritten. This version doesn't allocate memory for long d...

Reply via email to