Revision: 2959
Author: [email protected]
Date: Wed Sep 23 05:32:24 2009
Log: * Remove non-Open Source code from Douglas Crockford.
* Be more var-correct in JS files.
* Rename some JS variables to reflect the fact that they are instance
variables on the global intrinsics object.
* Missing optimization in StringCharAt.
Review URL: http://codereview.chromium.org/215052
http://code.google.com/p/v8/source/detail?r=2959
Modified:
/branches/bleeding_edge/LICENSE
/branches/bleeding_edge/src/array.js
/branches/bleeding_edge/src/debug-delay.js
/branches/bleeding_edge/src/messages.js
/branches/bleeding_edge/src/mirror-delay.js
/branches/bleeding_edge/src/string.js
/branches/bleeding_edge/src/uri.js
/branches/bleeding_edge/tools/js2c.py
/branches/bleeding_edge/tools/jsmin.py
=======================================
--- /branches/bleeding_edge/LICENSE Tue Sep 15 04:51:40 2009
+++ /branches/bleeding_edge/LICENSE Wed Sep 23 05:32:24 2009
@@ -21,10 +21,6 @@
This code is copyrighted by Sun Microsystems Inc. and released
under a 3-clause BSD license.
- - JSMin JavaScript minifier, located at tools/jsmin.py. This code is
- copyrighted by Douglas Crockford and Baruch Even and released under
- an MIT license.
-
- Valgrind client API header, located at third_party/valgrind/valgrind.h
This is release under the BSD license.
=======================================
--- /branches/bleeding_edge/src/array.js Wed Jun 10 04:42:22 2009
+++ /branches/bleeding_edge/src/array.js Wed Sep 23 05:32:24 2009
@@ -708,6 +708,8 @@
QuickSort(a, from, low_end);
QuickSort(a, high_start, to);
}
+
+ var length;
// Copies elements in the range 0..length from obj's prototype chain
// to obj itself, if obj has holes. Returns one more than the maximal
index
@@ -826,7 +828,7 @@
return first_undefined;
}
- var length = ToUint32(this.length);
+ length = ToUint32(this.length);
if (length < 2) return this;
var is_array = IS_ARRAY(this);
=======================================
--- /branches/bleeding_edge/src/debug-delay.js Wed Aug 26 05:22:44 2009
+++ /branches/bleeding_edge/src/debug-delay.js Wed Sep 23 05:32:24 2009
@@ -25,8 +25,6 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// jsminify this file, js2c: jsmin
-
// Default number of frames to include in the response to backtrace
request.
const kDefaultBacktraceLength = 10;
@@ -35,7 +33,7 @@
// Regular expression to skip "crud" at the beginning of a source line
which is
// not really code. Currently the regular expression matches whitespace and
// comments.
-const sourceLineBeginningSkip = /^(?:[ \v\h]*(?:\/\*.*?\*\/)*)*/;
+const sourceLineBeginningSkip = /^(?:\s*(?:\/\*.*?\*\/)*)*/;
// Debug events which can occour in the V8 JavaScript engine. These
originate
// from the API include file debug.h.
=======================================
--- /branches/bleeding_edge/src/messages.js Tue Sep 15 04:51:40 2009
+++ /branches/bleeding_edge/src/messages.js Wed Sep 23 05:32:24 2009
@@ -32,6 +32,11 @@
var kVowelSounds = 0;
var kCapitalVowelSounds = 0;
+// If this object gets passed to an error constructor the error will
+// get an accessor for .message that constructs a descriptive error
+// message on access.
+var kAddMessageAccessorsMarker = { };
+
function GetInstanceName(cons) {
if (cons.length == 0) {
@@ -565,11 +570,6 @@
//
----------------------------------------------------------------------------
// Error implementation
-// If this object gets passed to an error constructor the error will
-// get an accessor for .message that constructs a descriptive error
-// message on access.
-var kAddMessageAccessorsMarker = { };
-
// Defines accessors for a property that is calculated the first time
// the property is read.
function DefineOneShotAccessor(obj, name, fun) {
@@ -781,14 +781,15 @@
}
for (var i = 0; i < frames.length; i++) {
var frame = frames[i];
+ var line;
try {
- var line = FormatSourcePosition(frame);
+ line = FormatSourcePosition(frame);
} catch (e) {
try {
- var line = "<error: " + e + ">";
+ line = "<error: " + e + ">";
} catch (ee) {
// Any code that reaches this point is seriously nasty!
- var line = "<error>";
+ line = "<error>";
}
}
lines.push(" at " + line);
=======================================
--- /branches/bleeding_edge/src/mirror-delay.js Tue Sep 8 03:20:28 2009
+++ /branches/bleeding_edge/src/mirror-delay.js Wed Sep 23 05:32:24 2009
@@ -25,8 +25,6 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// jsminify this file, js2c: jsmin
-
// Touch the RegExp and Date functions to make sure that date-delay.js and
// regexp-delay.js has been loaded. This is required as the mirrors use
// functions within these files through the builtins object.
=======================================
--- /branches/bleeding_edge/src/string.js Fri Jul 3 05:08:00 2009
+++ /branches/bleeding_edge/src/string.js Wed Sep 23 05:32:24 2009
@@ -62,7 +62,7 @@
// ECMA-262, section 15.5.4.4
function StringCharAt(pos) {
- var char_code = %_FastCharCodeAt(this, index);
+ var char_code = %_FastCharCodeAt(this, pos);
if (!%_IsSmi(char_code)) {
var subject = ToString(this);
var index = TO_INTEGER(pos);
@@ -182,6 +182,14 @@
}
return %StringSlice(string, start, end);
}
+
+
+// This has the same size as the lastMatchInfo array, and can be used for
+// functions that expect that structure to be returned. It is used when
the
+// needle is a string rather than a regexp. In this case we can't update
+// lastMatchArray without erroneously affecting the properties on the
global
+// RegExp object.
+var reusableMatchInfo = [2, "", "", -1, -1];
// ECMA-262, section 15.5.4.11
@@ -222,14 +230,6 @@
return builder.generate();
}
-
-
-// This has the same size as the lastMatchInfo array, and can be used for
-// functions that expect that structure to be returned. It is used when
the
-// needle is a string rather than a regexp. In this case we can't update
-// lastMatchArray without erroneously affecting the properties on the
global
-// RegExp object.
-var reusableMatchInfo = [2, "", "", -1, -1];
// Helper function for regular expressions in String.prototype.replace.
@@ -370,8 +370,8 @@
// 'abcd'.replace(/(.)/g, function() { return RegExp.$1; }
// should be 'abcd' and not 'dddd' (or anything else).
function StringReplaceRegExpWithFunction(subject, regexp, replace) {
- var lastMatchInfo = DoRegExpExec(regexp, subject, 0);
- if (IS_NULL(lastMatchInfo)) return subject;
+ var matchInfo = DoRegExpExec(regexp, subject, 0);
+ if (IS_NULL(matchInfo)) return subject;
var result = new ReplaceResultBuilder(subject);
// There's at least one match. If the regexp is global, we have to loop
@@ -382,11 +382,11 @@
if (regexp.global) {
var previous = 0;
do {
- result.addSpecialSlice(previous, lastMatchInfo[CAPTURE0]);
- var startOfMatch = lastMatchInfo[CAPTURE0];
- previous = lastMatchInfo[CAPTURE1];
- result.add(ApplyReplacementFunction(replace, lastMatchInfo,
subject));
- // Can't use lastMatchInfo any more from here, since the function
could
+ result.addSpecialSlice(previous, matchInfo[CAPTURE0]);
+ var startOfMatch = matchInfo[CAPTURE0];
+ previous = matchInfo[CAPTURE1];
+ result.add(ApplyReplacementFunction(replace, matchInfo, subject));
+ // Can't use matchInfo any more from here, since the function could
// overwrite it.
// Continue with the next match.
// Increment previous if we matched an empty string, as per ECMA-262
@@ -401,20 +401,20 @@
// Per ECMA-262 15.10.6.2, if the previous index is greater than the
// string length, there is no match
- lastMatchInfo = (previous > subject.length)
+ matchInfo = (previous > subject.length)
? null
: DoRegExpExec(regexp, subject, previous);
- } while (!IS_NULL(lastMatchInfo));
+ } while (!IS_NULL(matchInfo));
// Tack on the final right substring after the last match, if
necessary.
if (previous < subject.length) {
result.addSpecialSlice(previous, subject.length);
}
} else { // Not a global regexp, no need to loop.
- result.addSpecialSlice(0, lastMatchInfo[CAPTURE0]);
- var endOfMatch = lastMatchInfo[CAPTURE1];
- result.add(ApplyReplacementFunction(replace, lastMatchInfo, subject));
- // Can't use lastMatchInfo any more from here, since the function could
+ result.addSpecialSlice(0, matchInfo[CAPTURE0]);
+ var endOfMatch = matchInfo[CAPTURE1];
+ result.add(ApplyReplacementFunction(replace, matchInfo, subject));
+ // Can't use matchInfo any more from here, since the function could
// overwrite it.
result.addSpecialSlice(endOfMatch, subject.length);
}
@@ -424,20 +424,20 @@
// Helper function to apply a string replacement function once.
-function ApplyReplacementFunction(replace, lastMatchInfo, subject) {
+function ApplyReplacementFunction(replace, matchInfo, subject) {
// Compute the parameter list consisting of the match, captures, index,
// and subject for the replace function invocation.
- var index = lastMatchInfo[CAPTURE0];
+ var index = matchInfo[CAPTURE0];
// The number of captures plus one for the match.
- var m = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1;
+ var m = NUMBER_OF_CAPTURES(matchInfo) >> 1;
if (m == 1) {
- var s = CaptureString(subject, lastMatchInfo, 0);
+ var s = CaptureString(subject, matchInfo, 0);
// Don't call directly to avoid exposing the built-in global object.
return replace.call(null, s, index, subject);
}
var parameters = $Array(m + 2);
for (var j = 0; j < m; j++) {
- parameters[j] = CaptureString(subject, lastMatchInfo, j);
+ parameters[j] = CaptureString(subject, matchInfo, j);
}
parameters[j] = index;
parameters[j + 1] = subject;
@@ -539,14 +539,14 @@
return result;
}
- var lastMatchInfo = splitMatch(separator, subject, currentIndex,
startIndex);
-
- if (IS_NULL(lastMatchInfo)) {
+ var matchInfo = splitMatch(separator, subject, currentIndex,
startIndex);
+
+ if (IS_NULL(matchInfo)) {
result[result.length] = subject.slice(currentIndex, length);
return result;
}
- var endIndex = lastMatchInfo[CAPTURE1];
+ var endIndex = matchInfo[CAPTURE1];
// We ignore a zero-length match at the currentIndex.
if (startIndex === endIndex && endIndex === currentIndex) {
@@ -554,12 +554,12 @@
continue;
}
- result[result.length] = SubString(subject, currentIndex,
lastMatchInfo[CAPTURE0]);
+ result[result.length] = SubString(subject, currentIndex,
matchInfo[CAPTURE0]);
if (result.length === limit) return result;
- for (var i = 2; i < NUMBER_OF_CAPTURES(lastMatchInfo); i += 2) {
- var start = lastMatchInfo[CAPTURE(i)];
- var end = lastMatchInfo[CAPTURE(i + 1)];
+ for (var i = 2; i < NUMBER_OF_CAPTURES(matchInfo); i += 2) {
+ var start = matchInfo[CAPTURE(i)];
+ var end = matchInfo[CAPTURE(i + 1)];
if (start != -1 && end != -1) {
result[result.length] = SubString(subject, start, end);
} else {
@@ -574,16 +574,16 @@
// ECMA-262 section 15.5.4.14
-// Helper function used by split. This version returns the lastMatchInfo
+// Helper function used by split. This version returns the matchInfo
// instead of allocating a new array with basically the same information.
function splitMatch(separator, subject, current_index, start_index) {
if (IS_REGEXP(separator)) {
- var lastMatchInfo = DoRegExpExec(separator, subject, start_index);
- if (lastMatchInfo == null) return null;
+ var matchInfo = DoRegExpExec(separator, subject, start_index);
+ if (matchInfo == null) return null;
// Section 15.5.4.14 paragraph two says that we do not allow zero
length
// matches at the end of the string.
- if (lastMatchInfo[CAPTURE0] === subject.length) return null;
- return lastMatchInfo;
+ if (matchInfo[CAPTURE0] === subject.length) return null;
+ return matchInfo;
}
var separatorIndex = subject.indexOf(separator, start_index);
=======================================
--- /branches/bleeding_edge/src/uri.js Thu Aug 13 00:36:28 2009
+++ /branches/bleeding_edge/src/uri.js Wed Sep 23 05:32:24 2009
@@ -30,6 +30,11 @@
// Expect $String = global.String;
+// Lazily initialized.
+var hexCharArray = 0;
+var hexCharCodeArray = 0;
+
+
function URIAddEncodedOctetToBuffer(octet, result, index) {
result[index++] = 37; // Char code of '%'.
result[index++] = hexCharCodeArray[octet >> 4];
@@ -318,11 +323,6 @@
var string = ToString(component);
return Encode(string, unescapePredicate);
}
-
-
-// Lazily initialized.
-var hexCharArray = 0;
-var hexCharCodeArray = 0;
function HexValueOf(c) {
=======================================
--- /branches/bleeding_edge/tools/js2c.py Mon Aug 31 08:02:54 2009
+++ /branches/bleeding_edge/tools/js2c.py Wed Sep 23 05:32:24 2009
@@ -52,20 +52,6 @@
return lines
-def CompressScript(lines, do_jsmin):
- # If we're not expecting this code to be user visible, we can run it
through
- # a more aggressive minifier.
- if do_jsmin:
- return jsmin.jsmin(lines)
-
- # Remove stuff from the source that we don't want to appear when
- # people print the source code using Function.prototype.toString().
- # Note that we could easily compress the scripts mode but don't
- # since we want it to remain readable.
- lines = RemoveCommentsAndTrailingWhitespace(lines)
- return lines
-
-
def ReadFile(filename):
file = open(filename, "rt")
try:
@@ -295,16 +281,18 @@
# Build source code lines
source_lines = [ ]
+
+ minifier = jsmin.JavaScriptMinifier()
+
source_lines_empty = []
for module in modules:
filename = str(module)
delay = filename.endswith('-delay.js')
lines = ReadFile(filename)
- do_jsmin = lines.find('// jsminify this file, js2c: jsmin') != -1
lines = ExpandConstants(lines, consts)
lines = ExpandMacros(lines, macros)
Validate(lines, filename)
- lines = CompressScript(lines, do_jsmin)
+ lines = minifier.JSMinify(lines)
data = ToCArray(lines)
id = (os.path.split(filename)[1])[:-3]
if delay: id = id[:-6]
=======================================
--- /branches/bleeding_edge/tools/jsmin.py Wed Jan 28 02:15:11 2009
+++ /branches/bleeding_edge/tools/jsmin.py Wed Sep 23 05:32:24 2009
@@ -1,218 +1,278 @@
-#!/usr/bin/python
-
-# This code is original from jsmin by Douglas Crockford, it was translated
to
-# Python by Baruch Even. The original code had the following copyright and
-# license.
-#
-# /* jsmin.c
-# 2007-05-22
-#
-# Copyright (c) 2002 Douglas Crockford (www.crockford.com)
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
copy of
-# this software and associated documentation files (the "Software"), to
deal in
-# the Software without restriction, including without limitation the
rights to
-# use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies
-# of the Software, and to permit persons to whom the Software is furnished
to do
-# so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included
in all
-# copies or substantial portions of the Software.
-#
-# The Software shall be used for Good, not Evil.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE
-# SOFTWARE.
-# */
-
-from StringIO import StringIO
-
-def jsmin(js):
- ins = StringIO(js)
- outs = StringIO()
- JavascriptMinify().minify(ins, outs)
- str = outs.getvalue()
- if len(str) > 0 and str[0] == '\n':
- str = str[1:]
- return str
-
-def isAlphanum(c):
- """return true if the character is a letter, digit, underscore,
- dollar sign, or non-ASCII character.
+#!/usr/bin/python2.4
+
+# Copyright 2009 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""A JavaScript minifier.
+
+It is far from being a complete JS parser, so there are many valid
+JavaScript programs that will be ruined by it. Another strangeness is that
+it accepts $ and % as parts of identifiers. It doesn't merge lines or
strip
+out blank lines in order to ease debugging. Variables at the top scope are
+properties of the global object so we can't rename them. It is assumed
that
+you introduce variables with var as if JavaScript followed C++ scope rules
+around curly braces, so the declaration must be above the first use.
+
+Use as:
+import jsmin
+minifier = JavaScriptMinifier()
+program1 = minifier.JSMinify(program1)
+program2 = minifier.JSMinify(program2)
+"""
+
+import re
+
+
+class JavaScriptMinifier(object):
+ """An object that you can feed code snippets to to get them minified."""
+
+ def __init__(self):
+ # We prepopulate the list of identifiers that shouldn't be used. These
+ # short language keywords could otherwise be used by the script as
variable
+ # names.
+ self.seen_identifiers = {"do": True, "in": True}
+ self.identifier_counter = 0
+ self.in_comment = False
+ self.map = {}
+ self.nesting = 0
+
+ def LookAtIdentifier(self, m):
+ """Records identifiers or keywords that we see in use.
+
+ (So we can avoid renaming variables to these strings.)
+ Args:
+ m: The match object returned by re.search.
+
+ Returns:
+ Nothing.
"""
- return ((c >= 'a' and c <= 'z') or (c >= '0' and c <= '9') or
- (c >= 'A' and c <= 'Z') or c == '_' or c == '$' or c == '\\'
or (c is not None and ord(c) > 126));
-
-class UnterminatedComment(Exception):
- pass
-
-class UnterminatedStringLiteral(Exception):
- pass
-
-class UnterminatedRegularExpression(Exception):
- pass
-
-class JavascriptMinify(object):
-
- def _outA(self):
- self.outstream.write(self.theA)
- def _outB(self):
- self.outstream.write(self.theB)
-
- def _get(self):
- """return the next character from stdin. Watch out for lookahead.
If
- the character is a control character, translate it to a space or
- linefeed.
- """
- c = self.theLookahead
- self.theLookahead = None
- if c == None:
- c = self.instream.read(1)
- if c >= ' ' or c == '\n':
- return c
- if c == '': # EOF
- return '\000'
- if c == '\r':
- return '\n'
- return ' '
-
- def _peek(self):
- self.theLookahead = self._get()
- return self.theLookahead
-
- def _next(self):
- """get the next character, excluding comments. peek() is used to
see
- if an unescaped '/' is followed by a '/' or '*'.
- """
- c = self._get()
- if c == '/' and self.theA != '\\':
- p = self._peek()
- if p == '/':
- c = self._get()
- while c > '\n':
- c = self._get()
- return c
- if p == '*':
- c = self._get()
- while 1:
- c = self._get()
- if c == '*':
- if self._peek() == '/':
- self._get()
- return ' '
- if c == '\000':
- raise UnterminatedComment()
-
- return c
-
- def _action(self, action):
- """do something! What you do is determined by the argument:
- 1 Output A. Copy B to A. Get the next B.
- 2 Copy B to A. Get the next B. (Delete A).
- 3 Get the next B. (Delete B).
- action treats a string as a single character. Wow!
- action recognizes a regular expression if it is preceded by (
or , or =.
- """
- if action <= 1:
- self._outA()
-
- if action <= 2:
- self.theA = self.theB
- if self.theA == "'" or self.theA == '"':
- while 1:
- self._outA()
- self.theA = self._get()
- if self.theA == self.theB:
- break
- if self.theA <= '\n':
- raise UnterminatedStringLiteral()
- if self.theA == '\\':
- self._outA()
- self.theA = self._get()
-
-
- if action <= 3:
- self.theB = self._next()
- if self.theB == '/' and (self.theA == '(' or self.theA == ','
or
- self.theA == '=' or self.theA == ':'
or
- self.theA == '[' or self.theA == '?'
or
- self.theA == '!' or self.theA == '&'
or
- self.theA == '|' or self.theA == ';'
or
- self.theA == '{' or self.theA == '}'
or
- self.theA == '\n'):
- self._outA()
- self._outB()
- while 1:
- self.theA = self._get()
- if self.theA == '/':
- break
- elif self.theA == '\\':
- self._outA()
- self.theA = self._get()
- elif self.theA <= '\n':
- raise UnterminatedRegularExpression()
- self._outA()
- self.theB = self._next()
-
-
- def _jsmin(self):
- """Copy the input to the output, deleting the characters which are
- insignificant to JavaScript. Comments will be removed. Tabs
will be
- replaced with spaces. Carriage returns will be replaced with
linefeeds.
- Most spaces and linefeeds will be removed.
- """
- self.theA = '\n'
- self._action(3)
-
- while self.theA != '\000':
- if self.theA == ' ':
- if isAlphanum(self.theB):
- self._action(1)
- else:
- self._action(2)
- elif self.theA == '\n':
- if self.theB in ['{', '[', '(', '+', '-']:
- self._action(1)
- elif self.theB == ' ':
- self._action(3)
- else:
- if isAlphanum(self.theB):
- self._action(1)
- else:
- self._action(2)
- else:
- if self.theB == ' ':
- if isAlphanum(self.theA):
- self._action(1)
- else:
- self._action(3)
- elif self.theB == '\n':
- if self.theA in ['}', ']', ')', '+', '-', '"', '\'']:
- self._action(1)
- else:
- if isAlphanum(self.theA):
- self._action(1)
- else:
- self._action(3)
- else:
- self._action(1)
-
- def minify(self, instream, outstream):
- self.instream = instream
- self.outstream = outstream
- self.theA = '\n'
- self.theB = None
- self.theLookahead = None
-
- self._jsmin()
- self.instream.close()
-
-if __name__ == '__main__':
- import sys
- jsm = JavascriptMinify()
- jsm.minify(sys.stdin, sys.stdout)
+ identifier = m.group(1)
+ self.seen_identifiers[identifier] = True
+
+ def Push(self):
+ """Called when we encounter a '{'."""
+ self.nesting += 1
+
+ def Pop(self):
+ """Called when we encounter a '}'."""
+ self.nesting -= 1
+ # We treat each top-level opening brace as a single scope that can span
+ # several sets of nested braces.
+ if self.nesting == 0:
+ self.map = {}
+ self.identifier_counter = 0
+
+ def Declaration(self, m):
+ """Rewrites bits of the program selected by a regexp.
+
+ These can be curly braces, literal strings, function declarations and
var
+ declarations. (These last two must be on one line including the
opening
+ curly brace of the function for their variables to be renamed).
+
+ Args:
+ m: The match object returned by re.search.
+
+ Returns:
+ The string that should replace the match in the rewritten program.
+ """
+ matched_text = m.group(0)
+ if matched_text == "{":
+ self.Push()
+ return matched_text
+ if matched_text == "}":
+ self.Pop()
+ return matched_text
+ if re.match("[\"'/]", matched_text):
+ return matched_text
+ m = re.match(r"var ", matched_text)
+ if m:
+ var_names = matched_text[m.end():]
+ var_names = re.split(r",", var_names)
+ return "var " + ",".join(map(self.FindNewName, var_names))
+ m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text)
+ if m:
+ up_to_args = m.group(1)
+ args = m.group(2)
+ args = re.split(r",", args)
+ self.Push()
+ return up_to_args + "(" + ",".join(map(self.FindNewName, args))
+ "){"
+
+ if matched_text in self.map:
+ return self.map[matched_text]
+
+ return matched_text
+
+ def CharFromNumber(self, number):
+ """A single-digit base-52 encoding using a-zA-Z."""
+ if number < 26:
+ return chr(number + 97)
+ number -= 26
+ return chr(number + 65)
+
+ def FindNewName(self, var_name):
+ """Finds a new 1-character or 2-character name for a variable.
+
+ Enters it into the mapping table for this scope.
+
+ Args:
+ var_name: The name of the variable before renaming.
+
+ Returns:
+ The new name of the variable.
+ """
+ new_identifier = ""
+ # Variable names that end in _ are member variables of the global
object,
+ # so they can be visible from code in a different scope. We leave them
+ # alone.
+ if var_name in self.map:
+ return self.map[var_name]
+ if self.nesting == 0:
+ return var_name
+ while True:
+ identifier_first_char = self.identifier_counter % 52
+ identifier_second_char = self.identifier_counter / 52
+ new_identifier = self.CharFromNumber(identifier_first_char)
+ if identifier_second_char != 0:
+ new_identifier = (
+ self.CharFromNumber(identifier_second_char - 1) +
new_identifier)
+ self.identifier_counter += 1
+ if not new_identifier in self.seen_identifiers:
+ break
+
+ self.map[var_name] = new_identifier
+ return new_identifier
+
+ def RemoveSpaces(self, m):
+ """Returns literal strings unchanged, replaces other inputs with group
2.
+
+ Other inputs are replaced with the contents of capture 1. This is
either
+ a single space or an empty string.
+
+ Args:
+ m: The match object returned by re.search.
+
+ Returns:
+ The string that should be inserted instead of the matched text.
+ """
+ entire_match = m.group(0)
+ replacement = m.group(1)
+ if re.match(r"'.*'$", entire_match):
+ return entire_match
+ if re.match(r'".*"$', entire_match):
+ return entire_match
+ if re.match(r"/.+/$", entire_match):
+ return entire_match
+ return replacement
+
+ def JSMinify(self, text):
+ """The main entry point. Takes a text and returns a compressed
version.
+
+ The compressed version hopefully does the same thing. Line breaks are
+ preserved.
+
+ Args:
+ text: The text of the code snippet as a multiline string.
+
+ Returns:
+ The compressed text of the code snippet as a multiline string.
+ """
+ new_lines = []
+ for line in re.split(r"\n", text):
+ line = line.replace("\t", " ")
+ if self.in_comment:
+ m = re.search(r"\*/", line)
+ if m:
+ line = line[m.end():]
+ self.in_comment = False
+ else:
+ new_lines.append("")
+ continue
+
+ if not self.in_comment:
+ line = re.sub(r"/\*.*?\*/", " ", line)
+ line = re.sub(r"//.*", "", line)
+ m = re.search(r"/\*", line)
+ if m:
+ line = line[:m.start()]
+ self.in_comment = True
+
+ # Strip leading and trailing spaces.
+ line = re.sub(r"^ +", "", line)
+ line = re.sub(r" +$", "", line)
+ # A regexp that matches a literal string surrounded by "double
quotes".
+ # This regexp can handle embedded backslash-escaped characters
including
+ # embedded backslash-escaped double quotes.
+ double_quoted_string = r'"(?:[^"\\]|\\.)*"'
+ # A regexp that matches a literal string surrounded by 'double
quotes'.
+ single_quoted_string = r"'(?:[^'\\]|\\.)*'"
+ # A regexp that matches a regexp literal surrounded by /slashes/.
+ slash_quoted_regexp = r"/(?:[^/\\]|\\.)+/"
+ # Replace multiple spaces with a single space.
+ line = re.sub("|".join([double_quoted_string,
+ single_quoted_string,
+ slash_quoted_regexp,
+ "( )+"]),
+ self.RemoveSpaces,
+ line)
+ # Strip single spaces unless they have an identifier character both
before
+ # and after the space. % and $ are counted as identifier characters.
+ line = re.sub("|".join([double_quoted_string,
+ single_quoted_string,
+ slash_quoted_regexp,
+ r"(?<![a-zA-Z_0-9$%]) |
(?![a-zA-Z_0-9$%])()"]),
+ self.RemoveSpaces,
+ line)
+ # Collect keywords and identifiers that are already in use.
+ if self.nesting == 0:
+ re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line)
+ function_declaration_regexp = (
+ r"\bfunction" # Function definition keyword...
+ r"( [\w$%]+)?" # ...optional function name...
+ r"\([\w$%,]+\)\{") # ...argument declarations.
+ # Unfortunately the keyword-value syntax { key:value } makes the key
look
+ # like a variable where in fact it is a literal string. We use the
+ # presence or absence of a question mark to try to distinguish
between
+ # this case and the ternary operator: "condition ? iftrue : iffalse".
+ if re.search(r"\?", line):
+ block_trailing_colon = r""
+ else:
+ block_trailing_colon = r"(?![:\w$%])"
+ # Variable use. Cannot follow a period precede a colon.
+ variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon
+ line = re.sub("|".join([double_quoted_string,
+ single_quoted_string,
+ slash_quoted_regexp,
+ r"\{", # Curly braces.
+ r"\}",
+ r"\bvar [\w$%,]+", # var declarations.
+ function_declaration_regexp,
+ variable_use_regexp]),
+ self.Declaration,
+ line)
+ new_lines.append(line)
+
+ return "\n".join(new_lines) + "\n"
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---