[v8-dev] [v8] r2959 committed - * Remove non-Open Source code from Douglas Crockford....

codesite-noreply Wed, 23 Sep 2009 06:32:44 -0700

Revision: 2959
Author: [email protected]
Date: Wed Sep 23 05:32:24 2009
Log: * Remove non-Open Source code from Douglas Crockford.
* Be more var-correct in JS files.
* Rename some JS variables to reflect the fact that they are instance
   variables on the global intrinsics object.
* Missing optimization in StringCharAt.
Review URL: http://codereview.chromium.org/215052
http://code.google.com/p/v8/source/detail?r=2959


Modified:
  /branches/bleeding_edge/LICENSE
  /branches/bleeding_edge/src/array.js
  /branches/bleeding_edge/src/debug-delay.js
  /branches/bleeding_edge/src/messages.js
  /branches/bleeding_edge/src/mirror-delay.js
  /branches/bleeding_edge/src/string.js
  /branches/bleeding_edge/src/uri.js
  /branches/bleeding_edge/tools/js2c.py
  /branches/bleeding_edge/tools/jsmin.py

=======================================
--- /branches/bleeding_edge/LICENSE     Tue Sep 15 04:51:40 2009
+++ /branches/bleeding_edge/LICENSE     Wed Sep 23 05:32:24 2009
@@ -21,10 +21,6 @@
      This code is copyrighted by Sun Microsystems Inc. and released
      under a 3-clause BSD license.

-  - JSMin JavaScript minifier, located at tools/jsmin.py.  This code is
-    copyrighted by Douglas Crockford and Baruch Even and released under
-    an MIT license.
-
    - Valgrind client API header, located at third_party/valgrind/valgrind.h
      This is release under the BSD license.

=======================================
--- /branches/bleeding_edge/src/array.js        Wed Jun 10 04:42:22 2009
+++ /branches/bleeding_edge/src/array.js        Wed Sep 23 05:32:24 2009
@@ -708,6 +708,8 @@
      QuickSort(a, from, low_end);
      QuickSort(a, high_start, to);
    }
+
+  var length;

    // Copies elements in the range 0..length from obj's prototype chain
    // to obj itself, if obj has holes. Returns one more than the maximal  
index
@@ -826,7 +828,7 @@
      return first_undefined;
    }

-  var length = ToUint32(this.length);
+  length = ToUint32(this.length);
    if (length < 2) return this;

    var is_array = IS_ARRAY(this);
=======================================
--- /branches/bleeding_edge/src/debug-delay.js  Wed Aug 26 05:22:44 2009
+++ /branches/bleeding_edge/src/debug-delay.js  Wed Sep 23 05:32:24 2009
@@ -25,8 +25,6 @@
  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-// jsminify this file, js2c: jsmin
-
  // Default number of frames to include in the response to backtrace  
request.
  const kDefaultBacktraceLength = 10;

@@ -35,7 +33,7 @@
  // Regular expression to skip "crud" at the beginning of a source line  
which is
  // not really code. Currently the regular expression matches whitespace and
  // comments.
-const sourceLineBeginningSkip = /^(?:[ \v\h]*(?:\/\*.*?\*\/)*)*/;
+const sourceLineBeginningSkip = /^(?:\s*(?:\/\*.*?\*\/)*)*/;

  // Debug events which can occour in the V8 JavaScript engine. These  
originate
  // from the API include file debug.h.
=======================================
--- /branches/bleeding_edge/src/messages.js     Tue Sep 15 04:51:40 2009
+++ /branches/bleeding_edge/src/messages.js     Wed Sep 23 05:32:24 2009
@@ -32,6 +32,11 @@
  var kVowelSounds = 0;
  var kCapitalVowelSounds = 0;

+// If this object gets passed to an error constructor the error will
+// get an accessor for .message that constructs a descriptive error
+// message on access.
+var kAddMessageAccessorsMarker = { };
+

  function GetInstanceName(cons) {
    if (cons.length == 0) {
@@ -565,11 +570,6 @@
  //  
----------------------------------------------------------------------------
  // Error implementation

-// If this object gets passed to an error constructor the error will
-// get an accessor for .message that constructs a descriptive error
-// message on access.
-var kAddMessageAccessorsMarker = { };
-
  // Defines accessors for a property that is calculated the first time
  // the property is read.
  function DefineOneShotAccessor(obj, name, fun) {
@@ -781,14 +781,15 @@
    }
    for (var i = 0; i < frames.length; i++) {
      var frame = frames[i];
+    var line;
      try {
-      var line = FormatSourcePosition(frame);
+      line = FormatSourcePosition(frame);
      } catch (e) {
        try {
-        var line = "<error: " + e + ">";
+        line = "<error: " + e + ">";
        } catch (ee) {
          // Any code that reaches this point is seriously nasty!
-        var line = "<error>";
+        line = "<error>";
        }
      }
      lines.push("    at " + line);
=======================================
--- /branches/bleeding_edge/src/mirror-delay.js Tue Sep  8 03:20:28 2009
+++ /branches/bleeding_edge/src/mirror-delay.js Wed Sep 23 05:32:24 2009
@@ -25,8 +25,6 @@
  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-// jsminify this file, js2c: jsmin
-
  // Touch the RegExp and Date functions to make sure that date-delay.js and
  // regexp-delay.js has been loaded. This is required as the mirrors use
  // functions within these files through the builtins object.
=======================================
--- /branches/bleeding_edge/src/string.js       Fri Jul  3 05:08:00 2009
+++ /branches/bleeding_edge/src/string.js       Wed Sep 23 05:32:24 2009
@@ -62,7 +62,7 @@

  // ECMA-262, section 15.5.4.4
  function StringCharAt(pos) {
-  var char_code = %_FastCharCodeAt(this, index);
+  var char_code = %_FastCharCodeAt(this, pos);
    if (!%_IsSmi(char_code)) {
      var subject = ToString(this);
      var index = TO_INTEGER(pos);
@@ -182,6 +182,14 @@
    }
    return %StringSlice(string, start, end);
  }
+
+
+// This has the same size as the lastMatchInfo array, and can be used for
+// functions that expect that structure to be returned.  It is used when  
the
+// needle is a string rather than a regexp.  In this case we can't update
+// lastMatchArray without erroneously affecting the properties on the  
global
+// RegExp object.
+var reusableMatchInfo = [2, "", "", -1, -1];


  // ECMA-262, section 15.5.4.11
@@ -222,14 +230,6 @@

    return builder.generate();
  }
-
-
-// This has the same size as the lastMatchInfo array, and can be used for
-// functions that expect that structure to be returned.  It is used when  
the
-// needle is a string rather than a regexp.  In this case we can't update
-// lastMatchArray without erroneously affecting the properties on the  
global
-// RegExp object.
-var reusableMatchInfo = [2, "", "", -1, -1];


  // Helper function for regular expressions in String.prototype.replace.
@@ -370,8 +370,8 @@
  //     'abcd'.replace(/(.)/g, function() { return RegExp.$1; }
  // should be 'abcd' and not 'dddd' (or anything else).
  function StringReplaceRegExpWithFunction(subject, regexp, replace) {
-  var lastMatchInfo = DoRegExpExec(regexp, subject, 0);
-  if (IS_NULL(lastMatchInfo)) return subject;
+  var matchInfo = DoRegExpExec(regexp, subject, 0);
+  if (IS_NULL(matchInfo)) return subject;

    var result = new ReplaceResultBuilder(subject);
    // There's at least one match.  If the regexp is global, we have to loop
@@ -382,11 +382,11 @@
    if (regexp.global) {
      var previous = 0;
      do {
-      result.addSpecialSlice(previous, lastMatchInfo[CAPTURE0]);
-      var startOfMatch = lastMatchInfo[CAPTURE0];
-      previous = lastMatchInfo[CAPTURE1];
-      result.add(ApplyReplacementFunction(replace, lastMatchInfo,  
subject));
-      // Can't use lastMatchInfo any more from here, since the function  
could
+      result.addSpecialSlice(previous, matchInfo[CAPTURE0]);
+      var startOfMatch = matchInfo[CAPTURE0];
+      previous = matchInfo[CAPTURE1];
+      result.add(ApplyReplacementFunction(replace, matchInfo, subject));
+      // Can't use matchInfo any more from here, since the function could
        // overwrite it.
        // Continue with the next match.
        // Increment previous if we matched an empty string, as per ECMA-262
@@ -401,20 +401,20 @@

        // Per ECMA-262 15.10.6.2, if the previous index is greater than the
        // string length, there is no match
-      lastMatchInfo = (previous > subject.length)
+      matchInfo = (previous > subject.length)
            ? null
            : DoRegExpExec(regexp, subject, previous);
-    } while (!IS_NULL(lastMatchInfo));
+    } while (!IS_NULL(matchInfo));

      // Tack on the final right substring after the last match, if  
necessary.
      if (previous < subject.length) {
        result.addSpecialSlice(previous, subject.length);
      }
    } else { // Not a global regexp, no need to loop.
-    result.addSpecialSlice(0, lastMatchInfo[CAPTURE0]);
-    var endOfMatch = lastMatchInfo[CAPTURE1];
-    result.add(ApplyReplacementFunction(replace, lastMatchInfo, subject));
-    // Can't use lastMatchInfo any more from here, since the function could
+    result.addSpecialSlice(0, matchInfo[CAPTURE0]);
+    var endOfMatch = matchInfo[CAPTURE1];
+    result.add(ApplyReplacementFunction(replace, matchInfo, subject));
+    // Can't use matchInfo any more from here, since the function could
      // overwrite it.
      result.addSpecialSlice(endOfMatch, subject.length);
    }
@@ -424,20 +424,20 @@


  // Helper function to apply a string replacement function once.
-function ApplyReplacementFunction(replace, lastMatchInfo, subject) {
+function ApplyReplacementFunction(replace, matchInfo, subject) {
    // Compute the parameter list consisting of the match, captures, index,
    // and subject for the replace function invocation.
-  var index = lastMatchInfo[CAPTURE0];
+  var index = matchInfo[CAPTURE0];
    // The number of captures plus one for the match.
-  var m = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1;
+  var m = NUMBER_OF_CAPTURES(matchInfo) >> 1;
    if (m == 1) {
-    var s = CaptureString(subject, lastMatchInfo, 0);
+    var s = CaptureString(subject, matchInfo, 0);
      // Don't call directly to avoid exposing the built-in global object.
      return replace.call(null, s, index, subject);
    }
    var parameters = $Array(m + 2);
    for (var j = 0; j < m; j++) {
-    parameters[j] = CaptureString(subject, lastMatchInfo, j);
+    parameters[j] = CaptureString(subject, matchInfo, j);
    }
    parameters[j] = index;
    parameters[j + 1] = subject;
@@ -539,14 +539,14 @@
        return result;
      }

-    var lastMatchInfo = splitMatch(separator, subject, currentIndex,  
startIndex);
-
-    if (IS_NULL(lastMatchInfo)) {
+    var matchInfo = splitMatch(separator, subject, currentIndex,  
startIndex);
+
+    if (IS_NULL(matchInfo)) {
        result[result.length] = subject.slice(currentIndex, length);
        return result;
      }

-    var endIndex = lastMatchInfo[CAPTURE1];
+    var endIndex = matchInfo[CAPTURE1];

      // We ignore a zero-length match at the currentIndex.
      if (startIndex === endIndex && endIndex === currentIndex) {
@@ -554,12 +554,12 @@
        continue;
      }

-    result[result.length] = SubString(subject, currentIndex,  
lastMatchInfo[CAPTURE0]);
+    result[result.length] = SubString(subject, currentIndex,  
matchInfo[CAPTURE0]);
      if (result.length === limit) return result;

-    for (var i = 2; i < NUMBER_OF_CAPTURES(lastMatchInfo); i += 2) {
-      var start = lastMatchInfo[CAPTURE(i)];
-      var end = lastMatchInfo[CAPTURE(i + 1)];
+    for (var i = 2; i < NUMBER_OF_CAPTURES(matchInfo); i += 2) {
+      var start = matchInfo[CAPTURE(i)];
+      var end = matchInfo[CAPTURE(i + 1)];
        if (start != -1 && end != -1) {
          result[result.length] = SubString(subject, start, end);
        } else {
@@ -574,16 +574,16 @@


  // ECMA-262 section 15.5.4.14
-// Helper function used by split.  This version returns the lastMatchInfo
+// Helper function used by split.  This version returns the matchInfo
  // instead of allocating a new array with basically the same information.
  function splitMatch(separator, subject, current_index, start_index) {
    if (IS_REGEXP(separator)) {
-    var lastMatchInfo = DoRegExpExec(separator, subject, start_index);
-    if (lastMatchInfo == null) return null;
+    var matchInfo = DoRegExpExec(separator, subject, start_index);
+    if (matchInfo == null) return null;
      // Section 15.5.4.14 paragraph two says that we do not allow zero  
length
      // matches at the end of the string.
-    if (lastMatchInfo[CAPTURE0] === subject.length) return null;
-    return lastMatchInfo;
+    if (matchInfo[CAPTURE0] === subject.length) return null;
+    return matchInfo;
    }

    var separatorIndex = subject.indexOf(separator, start_index);
=======================================
--- /branches/bleeding_edge/src/uri.js  Thu Aug 13 00:36:28 2009
+++ /branches/bleeding_edge/src/uri.js  Wed Sep 23 05:32:24 2009
@@ -30,6 +30,11 @@

  // Expect $String = global.String;

+// Lazily initialized.
+var hexCharArray = 0;
+var hexCharCodeArray = 0;
+
+
  function URIAddEncodedOctetToBuffer(octet, result, index) {
    result[index++] = 37; // Char code of '%'.
    result[index++] = hexCharCodeArray[octet >> 4];
@@ -318,11 +323,6 @@
    var string = ToString(component);
    return Encode(string, unescapePredicate);
  }
-
-
-// Lazily initialized.
-var hexCharArray = 0;
-var hexCharCodeArray = 0;


  function HexValueOf(c) {
=======================================
--- /branches/bleeding_edge/tools/js2c.py       Mon Aug 31 08:02:54 2009
+++ /branches/bleeding_edge/tools/js2c.py       Wed Sep 23 05:32:24 2009
@@ -52,20 +52,6 @@
    return lines


-def CompressScript(lines, do_jsmin):
-  # If we're not expecting this code to be user visible, we can run it  
through
-  # a more aggressive minifier.
-  if do_jsmin:
-    return jsmin.jsmin(lines)
-
-  # Remove stuff from the source that we don't want to appear when
-  # people print the source code using Function.prototype.toString().
-  # Note that we could easily compress the scripts mode but don't
-  # since we want it to remain readable.
-  lines = RemoveCommentsAndTrailingWhitespace(lines)
-  return lines
-
-
  def ReadFile(filename):
    file = open(filename, "rt")
    try:
@@ -295,16 +281,18 @@

    # Build source code lines
    source_lines = [ ]
+
+  minifier = jsmin.JavaScriptMinifier()
+
    source_lines_empty = []
    for module in modules:
      filename = str(module)
      delay = filename.endswith('-delay.js')
      lines = ReadFile(filename)
-    do_jsmin = lines.find('// jsminify this file, js2c: jsmin') != -1
      lines = ExpandConstants(lines, consts)
      lines = ExpandMacros(lines, macros)
      Validate(lines, filename)
-    lines = CompressScript(lines, do_jsmin)
+    lines = minifier.JSMinify(lines)
      data = ToCArray(lines)
      id = (os.path.split(filename)[1])[:-3]
      if delay: id = id[:-6]
=======================================
--- /branches/bleeding_edge/tools/jsmin.py      Wed Jan 28 02:15:11 2009
+++ /branches/bleeding_edge/tools/jsmin.py      Wed Sep 23 05:32:24 2009
@@ -1,218 +1,278 @@
-#!/usr/bin/python
-
-# This code is original from jsmin by Douglas Crockford, it was translated  
to
-# Python by Baruch Even. The original code had the following copyright and
-# license.
-#
-# /* jsmin.c
-#    2007-05-22
-#
-# Copyright (c) 2002 Douglas Crockford  (www.crockford.com)
-#
-# Permission is hereby granted, free of charge, to any person obtaining a  
copy of
-# this software and associated documentation files (the "Software"), to  
deal in
-# the Software without restriction, including without limitation the  
rights to
-# use, copy, modify, merge, publish, distribute, sublicense, and/or sell  
copies
-# of the Software, and to permit persons to whom the Software is furnished  
to do
-# so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included  
in all
-# copies or substantial portions of the Software.
-#
-# The Software shall be used for Good, not Evil.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS  
OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL  
THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING  
FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS  
IN THE
-# SOFTWARE.
-# */
-
-from StringIO import StringIO
-
-def jsmin(js):
-    ins = StringIO(js)
-    outs = StringIO()
-    JavascriptMinify().minify(ins, outs)
-    str = outs.getvalue()
-    if len(str) > 0 and str[0] == '\n':
-        str = str[1:]
-    return str
-
-def isAlphanum(c):
-    """return true if the character is a letter, digit, underscore,
-           dollar sign, or non-ASCII character.
+#!/usr/bin/python2.4
+
+# Copyright 2009 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Google Inc. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""A JavaScript minifier.
+
+It is far from being a complete JS parser, so there are many valid
+JavaScript programs that will be ruined by it.  Another strangeness is that
+it accepts $ and % as parts of identifiers.  It doesn't merge lines or  
strip
+out blank lines in order to ease debugging.  Variables at the top scope are
+properties of the global object so we can't rename them.  It is assumed  
that
+you introduce variables with var as if JavaScript followed C++ scope rules
+around curly braces, so the declaration must be above the first use.
+
+Use as:
+import jsmin
+minifier = JavaScriptMinifier()
+program1 = minifier.JSMinify(program1)
+program2 = minifier.JSMinify(program2)
+"""
+
+import re
+
+
+class JavaScriptMinifier(object):
+  """An object that you can feed code snippets to to get them minified."""
+
+  def __init__(self):
+    # We prepopulate the list of identifiers that shouldn't be used.  These
+    # short language keywords could otherwise be used by the script as  
variable
+    # names.
+    self.seen_identifiers = {"do": True, "in": True}
+    self.identifier_counter = 0
+    self.in_comment = False
+    self.map = {}
+    self.nesting = 0
+
+  def LookAtIdentifier(self, m):
+    """Records identifiers or keywords that we see in use.
+
+    (So we can avoid renaming variables to these strings.)
+    Args:
+      m: The match object returned by re.search.
+
+    Returns:
+      Nothing.
      """
-    return ((c >= 'a' and c <= 'z') or (c >= '0' and c <= '9') or
-            (c >= 'A' and c <= 'Z') or c == '_' or c == '$' or c == '\\'  
or (c is not None and ord(c) > 126));
-
-class UnterminatedComment(Exception):
-    pass
-
-class UnterminatedStringLiteral(Exception):
-    pass
-
-class UnterminatedRegularExpression(Exception):
-    pass
-
-class JavascriptMinify(object):
-
-    def _outA(self):
-        self.outstream.write(self.theA)
-    def _outB(self):
-        self.outstream.write(self.theB)
-
-    def _get(self):
-        """return the next character from stdin. Watch out for lookahead.  
If
-           the character is a control character, translate it to a space or
-           linefeed.
-        """
-        c = self.theLookahead
-        self.theLookahead = None
-        if c == None:
-            c = self.instream.read(1)
-        if c >= ' ' or c == '\n':
-            return c
-        if c == '': # EOF
-            return '\000'
-        if c == '\r':
-            return '\n'
-        return ' '
-
-    def _peek(self):
-        self.theLookahead = self._get()
-        return self.theLookahead
-
-    def _next(self):
-        """get the next character, excluding comments. peek() is used to  
see
-           if an unescaped '/' is followed by a '/' or '*'.
-        """
-        c = self._get()
-        if c == '/' and self.theA != '\\':
-            p = self._peek()
-            if p == '/':
-                c = self._get()
-                while c > '\n':
-                    c = self._get()
-                return c
-            if p == '*':
-                c = self._get()
-                while 1:
-                    c = self._get()
-                    if c == '*':
-                        if self._peek() == '/':
-                            self._get()
-                            return ' '
-                    if c == '\000':
-                        raise UnterminatedComment()
-
-        return c
-
-    def _action(self, action):
-        """do something! What you do is determined by the argument:
-           1   Output A. Copy B to A. Get the next B.
-           2   Copy B to A. Get the next B. (Delete A).
-           3   Get the next B. (Delete B).
-           action treats a string as a single character. Wow!
-           action recognizes a regular expression if it is preceded by (  
or , or =.
-        """
-        if action <= 1:
-            self._outA()
-
-        if action <= 2:
-            self.theA = self.theB
-            if self.theA == "'" or self.theA == '"':
-                while 1:
-                    self._outA()
-                    self.theA = self._get()
-                    if self.theA == self.theB:
-                        break
-                    if self.theA <= '\n':
-                        raise UnterminatedStringLiteral()
-                    if self.theA == '\\':
-                        self._outA()
-                        self.theA = self._get()
-
-
-        if action <= 3:
-            self.theB = self._next()
-            if self.theB == '/' and (self.theA == '(' or self.theA == ','  
or
-                                     self.theA == '=' or self.theA == ':'  
or
-                                     self.theA == '[' or self.theA == '?'  
or
-                                     self.theA == '!' or self.theA == '&'  
or
-                                     self.theA == '|' or self.theA == ';'  
or
-                                     self.theA == '{' or self.theA == '}'  
or
-                                     self.theA == '\n'):
-                self._outA()
-                self._outB()
-                while 1:
-                    self.theA = self._get()
-                    if self.theA == '/':
-                        break
-                    elif self.theA == '\\':
-                        self._outA()
-                        self.theA = self._get()
-                    elif self.theA <= '\n':
-                        raise UnterminatedRegularExpression()
-                    self._outA()
-                self.theB = self._next()
-
-
-    def _jsmin(self):
-        """Copy the input to the output, deleting the characters which are
-           insignificant to JavaScript. Comments will be removed. Tabs  
will be
-           replaced with spaces. Carriage returns will be replaced with  
linefeeds.
-           Most spaces and linefeeds will be removed.
-        """
-        self.theA = '\n'
-        self._action(3)
-
-        while self.theA != '\000':
-            if self.theA == ' ':
-                if isAlphanum(self.theB):
-                    self._action(1)
-                else:
-                    self._action(2)
-            elif self.theA == '\n':
-                if self.theB in ['{', '[', '(', '+', '-']:
-                    self._action(1)
-                elif self.theB == ' ':
-                    self._action(3)
-                else:
-                    if isAlphanum(self.theB):
-                        self._action(1)
-                    else:
-                        self._action(2)
-            else:
-                if self.theB == ' ':
-                    if isAlphanum(self.theA):
-                        self._action(1)
-                    else:
-                        self._action(3)
-                elif self.theB == '\n':
-                    if self.theA in ['}', ']', ')', '+', '-', '"', '\'']:
-                        self._action(1)
-                    else:
-                        if isAlphanum(self.theA):
-                            self._action(1)
-                        else:
-                            self._action(3)
-                else:
-                    self._action(1)
-
-    def minify(self, instream, outstream):
-        self.instream = instream
-        self.outstream = outstream
-        self.theA = '\n'
-        self.theB = None
-        self.theLookahead = None
-
-        self._jsmin()
-        self.instream.close()
-
-if __name__ == '__main__':
-    import sys
-    jsm = JavascriptMinify()
-    jsm.minify(sys.stdin, sys.stdout)
+    identifier = m.group(1)
+    self.seen_identifiers[identifier] = True
+
+  def Push(self):
+    """Called when we encounter a '{'."""
+    self.nesting += 1
+
+  def Pop(self):
+    """Called when we encounter a '}'."""
+    self.nesting -= 1
+    # We treat each top-level opening brace as a single scope that can span
+    # several sets of nested braces.
+    if self.nesting == 0:
+      self.map = {}
+      self.identifier_counter = 0
+
+  def Declaration(self, m):
+    """Rewrites bits of the program selected by a regexp.
+
+    These can be curly braces, literal strings, function declarations and  
var
+    declarations.  (These last two must be on one line including the  
opening
+    curly brace of the function for their variables to be renamed).
+
+    Args:
+      m: The match object returned by re.search.
+
+    Returns:
+      The string that should replace the match in the rewritten program.
+    """
+    matched_text = m.group(0)
+    if matched_text == "{":
+      self.Push()
+      return matched_text
+    if matched_text == "}":
+      self.Pop()
+      return matched_text
+    if re.match("[\"'/]", matched_text):
+      return matched_text
+    m = re.match(r"var ", matched_text)
+    if m:
+      var_names = matched_text[m.end():]
+      var_names = re.split(r",", var_names)
+      return "var " + ",".join(map(self.FindNewName, var_names))
+    m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text)
+    if m:
+      up_to_args = m.group(1)
+      args = m.group(2)
+      args = re.split(r",", args)
+      self.Push()
+      return up_to_args + "(" + ",".join(map(self.FindNewName, args))  
+ "){"
+
+    if matched_text in self.map:
+      return self.map[matched_text]
+
+    return matched_text
+
+  def CharFromNumber(self, number):
+    """A single-digit base-52 encoding using a-zA-Z."""
+    if number < 26:
+      return chr(number + 97)
+    number -= 26
+    return chr(number + 65)
+
+  def FindNewName(self, var_name):
+    """Finds a new 1-character or 2-character name for a variable.
+
+    Enters it into the mapping table for this scope.
+
+    Args:
+      var_name: The name of the variable before renaming.
+
+    Returns:
+      The new name of the variable.
+    """
+    new_identifier = ""
+    # Variable names that end in _ are member variables of the global  
object,
+    # so they can be visible from code in a different scope.  We leave them
+    # alone.
+    if var_name in self.map:
+      return self.map[var_name]
+    if self.nesting == 0:
+      return var_name
+    while True:
+      identifier_first_char = self.identifier_counter % 52
+      identifier_second_char = self.identifier_counter / 52
+      new_identifier = self.CharFromNumber(identifier_first_char)
+      if identifier_second_char != 0:
+        new_identifier = (
+            self.CharFromNumber(identifier_second_char - 1) +  
new_identifier)
+      self.identifier_counter += 1
+      if not new_identifier in self.seen_identifiers:
+        break
+
+    self.map[var_name] = new_identifier
+    return new_identifier
+
+  def RemoveSpaces(self, m):
+    """Returns literal strings unchanged, replaces other inputs with group  
2.
+
+    Other inputs are replaced with the contents of capture 1.  This is  
either
+    a single space or an empty string.
+
+    Args:
+      m: The match object returned by re.search.
+
+    Returns:
+      The string that should be inserted instead of the matched text.
+    """
+    entire_match = m.group(0)
+    replacement = m.group(1)
+    if re.match(r"'.*'$", entire_match):
+      return entire_match
+    if re.match(r'".*"$', entire_match):
+      return entire_match
+    if re.match(r"/.+/$", entire_match):
+      return entire_match
+    return replacement
+
+  def JSMinify(self, text):
+    """The main entry point.  Takes a text and returns a compressed  
version.
+
+    The compressed version hopefully does the same thing.  Line breaks are
+    preserved.
+
+    Args:
+      text: The text of the code snippet as a multiline string.
+
+    Returns:
+      The compressed text of the code snippet as a multiline string.
+    """
+    new_lines = []
+    for line in re.split(r"\n", text):
+      line = line.replace("\t", " ")
+      if self.in_comment:
+        m = re.search(r"\*/", line)
+        if m:
+          line = line[m.end():]
+          self.in_comment = False
+        else:
+          new_lines.append("")
+          continue
+
+      if not self.in_comment:
+        line = re.sub(r"/\*.*?\*/", " ", line)
+        line = re.sub(r"//.*", "", line)
+        m = re.search(r"/\*", line)
+        if m:
+          line = line[:m.start()]
+          self.in_comment = True
+
+      # Strip leading and trailing spaces.
+      line = re.sub(r"^ +", "", line)
+      line = re.sub(r" +$", "", line)
+      # A regexp that matches a literal string surrounded by "double  
quotes".
+      # This regexp can handle embedded backslash-escaped characters  
including
+      # embedded backslash-escaped double quotes.
+      double_quoted_string = r'"(?:[^"\\]|\\.)*"'
+      # A regexp that matches a literal string surrounded by 'double  
quotes'.
+      single_quoted_string = r"'(?:[^'\\]|\\.)*'"
+      # A regexp that matches a regexp literal surrounded by /slashes/.
+      slash_quoted_regexp = r"/(?:[^/\\]|\\.)+/"
+      # Replace multiple spaces with a single space.
+      line = re.sub("|".join([double_quoted_string,
+                              single_quoted_string,
+                              slash_quoted_regexp,
+                              "( )+"]),
+                    self.RemoveSpaces,
+                    line)
+      # Strip single spaces unless they have an identifier character both  
before
+      # and after the space.  % and $ are counted as identifier characters.
+      line = re.sub("|".join([double_quoted_string,
+                              single_quoted_string,
+                              slash_quoted_regexp,
+                              r"(?<![a-zA-Z_0-9$%]) |  
(?![a-zA-Z_0-9$%])()"]),
+                    self.RemoveSpaces,
+                    line)
+      # Collect keywords and identifiers that are already in use.
+      if self.nesting == 0:
+        re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line)
+      function_declaration_regexp = (
+          r"\bfunction"              # Function definition keyword...
+          r"( [\w$%]+)?"             # ...optional function name...
+          r"\([\w$%,]+\)\{")         # ...argument declarations.
+      # Unfortunately the keyword-value syntax { key:value } makes the key  
look
+      # like a variable where in fact it is a literal string.  We use the
+      # presence or absence of a question mark to try to distinguish  
between
+      # this case and the ternary operator: "condition ? iftrue : iffalse".
+      if re.search(r"\?", line):
+        block_trailing_colon = r""
+      else:
+        block_trailing_colon = r"(?![:\w$%])"
+      # Variable use.  Cannot follow a period precede a colon.
+      variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon
+      line = re.sub("|".join([double_quoted_string,
+                              single_quoted_string,
+                              slash_quoted_regexp,
+                              r"\{",                  # Curly braces.
+                              r"\}",
+                              r"\bvar [\w$%,]+",      # var declarations.
+                              function_declaration_regexp,
+                              variable_use_regexp]),
+                    self.Declaration,
+                    line)
+      new_lines.append(line)
+
+    return "\n".join(new_lines) + "\n"

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

[v8-dev] [v8] r2959 committed - * Remove non-Open Source code from Douglas Crockford....

Reply via email to