Revision: 5959
Author: cromwellian
Date: Thu Aug 13 12:21:36 2009
Log: Function Clustering, improves gzip compression by significant margin.  
Top-level block restructuring for IE7 is now done purely via a  
text-transformation, while in-method block restructuring is done via the  
AST. Block restructuring is only performed for IE permutations (or  
permutations with no user.agent specified).





http://code.google.com/p/google-web-toolkit/source/detail?r=5959

Added:
   
/trunk/dev/core/src/com/google/gwt/dev/jjs/impl/JsAbstractTextTransformer.java
  /trunk/dev/core/src/com/google/gwt/dev/jjs/impl/JsFunctionClusterer.java
   
/trunk/dev/core/src/com/google/gwt/dev/jjs/impl/JsIEBlockTextTransformer.java
Modified:
  /trunk/dev/core/src/com/google/gwt/dev/jjs/JavaToJavaScriptCompiler.java

=======================================
--- /dev/null
+++  
/trunk/dev/core/src/com/google/gwt/dev/jjs/impl/JsAbstractTextTransformer.java  
 
Thu Aug 13 12:21:36 2009
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2009 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may  
not
+ * use this file except in compliance with the License. You may obtain a  
copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,  
WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations  
under
+ * the License.
+ */
+package com.google.gwt.dev.jjs.impl;
+
+import com.google.gwt.core.ext.linker.StatementRanges;
+import com.google.gwt.core.ext.linker.impl.StandardStatementRanges;
+
+import java.util.ArrayList;
+
+/**
+ * Base class for transforming program text.
+ */
+public abstract class JsAbstractTextTransformer {
+
+  protected String js;
+
+  protected StatementRanges statementRanges;
+
+  public JsAbstractTextTransformer(String js, StatementRanges  
statementRanges) {
+    this.js = js;
+    this.statementRanges = statementRanges;
+  }
+
+  public JsAbstractTextTransformer(JsAbstractTextTransformer xformer) {
+    this.js = xformer.getJs();
+    this.statementRanges = xformer.getStatementRanges();
+  }
+
+  public abstract void exec();
+
+  protected void addStatement(String code, StringBuilder newJs,
+      ArrayList<Integer> starts, ArrayList<Integer> ends) {
+    beginStatement(newJs, starts);
+    newJs.append(code);
+    endStatement(newJs, ends);
+  }
+
+  protected void endStatement(StringBuilder newJs, ArrayList<Integer>  
ends) {
+    ends.add(newJs.length());
+  }
+
+  protected void beginStatement(StringBuilder newJs,
+      ArrayList<Integer> starts) {
+    starts.add(newJs.length());
+  }
+
+  public String getJs() {
+    return js;
+  }
+
+  public StatementRanges getStatementRanges() {
+    return statementRanges;
+  }
+
+  protected String getJsForRange(int stmtIndex) {
+    return js.substring(statementRanges.start(stmtIndex),
+        statementRanges.end(stmtIndex));
+  }
+
+  /**
+   * Dump functions and fragments back into a new JS string, and calculate  
a new
+   * StatementRanges object.
+   */
+  protected void recomputeJsAndStatementRanges(int[] stmtIndices) {
+
+    StringBuilder newJs = new StringBuilder();
+    ArrayList<Integer> starts = new ArrayList<Integer>();
+    ArrayList<Integer> ends = new ArrayList<Integer>();
+
+    beginStatements(newJs, starts, ends);
+    for (int i = 0; i < stmtIndices.length; i++) {
+      String code = getJsForRange(stmtIndices[i]);
+      addStatement(code, newJs, starts, ends);
+    }
+    endStatements(newJs, starts, ends);
+
+    assert
+        starts.size() == ends.size() :
+        "Size mismatch between start and" + " end statement ranges.";
+    assert starts.get(0) == 0 && ends.get(ends.size() - 1) == newJs
+        .length() : "statement ranges don't cover entire JS output  
string.";
+
+    StandardStatementRanges newRanges = new StandardStatementRanges(starts,
+        ends);
+    js = newJs.toString();
+    statementRanges = newRanges;
+  }
+
+  /**
+   * Called if any operations need to be performed before all statements  
have
+   * been processed.
+   */
+  protected void beginStatements(StringBuilder newJs, ArrayList<Integer>  
starts,
+      ArrayList<Integer> ends) {
+  }
+
+  /**
+   * Called if any operations need to be performed after all statements  
have
+   * been processed.
+   */
+  protected void endStatements(StringBuilder newJs, ArrayList<Integer>  
starts,
+      ArrayList<Integer> ends) {
+  }
+}
=======================================
--- /dev/null
+++  
/trunk/dev/core/src/com/google/gwt/dev/jjs/impl/JsFunctionClusterer.java        
 
Thu Aug 13 12:21:36 2009
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2009 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may  
not
+ * use this file except in compliance with the License. You may obtain a  
copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,  
WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations  
under
+ * the License.
+ */
+package com.google.gwt.dev.jjs.impl;
+
+import com.google.gwt.core.ext.linker.StatementRanges;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.LinkedList;
+import java.util.Iterator;
+
+/**
+ * Re-orders function declarations according to a given metric and  
clustering
+ * algorithm in order to boost gzip/deflation compression efficiency. This
+ * version uses the edit-distance algorithm as a metric, and a semi-greedy
+ * strategy for grouping functions together.
+ */
+public class JsFunctionClusterer extends JsAbstractTextTransformer {
+
+  /**
+   * Limit edit-distance search to MAX_DIST.
+   */
+  private static final int MAX_DIST = 10;
+
+  private static final int MAX_DISTANCE_LIMIT = 100;
+
+  private List<Integer> functionIndices;
+
+  private int[] clusteredIndices;
+
+  public JsFunctionClusterer(String js, StatementRanges statementRanges) {
+    super(js, statementRanges);
+  }
+
+  public JsFunctionClusterer(JsAbstractTextTransformer xformer) {
+    super(xformer);
+  }
+
+  public void exec() {
+    functionIndices = new LinkedList<Integer>();
+
+    // gather up all of the indices of function decl statements
+    for (int i = 0; i < statementRanges.numStatements(); i++) {
+      String code = getJsForRange(i);
+      if (code.startsWith("function")) {
+        functionIndices.add(i);
+      }
+    }
+
+    // sort the indices according to size of statement range
+    Collections.sort(functionIndices, new Comparator<Integer>() {
+      public int compare(Integer index1, Integer index2) {
+        return stmtSize(index1) - (stmtSize(index2));
+      }
+    });
+
+    // used to hold the new output order
+    clusteredIndices = new int[functionIndices.size()];
+    int currentFunction = 0;
+
+    // remove the first function and stick it in the output array
+    clusteredIndices[currentFunction] = functionIndices.get(0);
+    functionIndices.remove(0);
+
+    while (!functionIndices.isEmpty()) {
+
+      // get the last outputted function to match against
+      String currentCode =  
getJsForRange(clusteredIndices[currentFunction]);
+      int bestDistance = 99999;
+      int bestIndex = 0;
+      int bestFunction = 0;
+
+      Iterator<Integer> it = functionIndices.iterator();
+      int count = 0;
+      // search up to MAX_DIST functions for the best match
+      while (it.hasNext() &&
+             count < Math.min(MAX_DIST, functionIndices.size())) {
+        int functionIndex = it.next();
+        String testCode = getJsForRange(functionIndex);
+        int distanceLimit = Math.min(bestDistance, MAX_DISTANCE_LIMIT);
+        int dist = levdist(currentCode, testCode, distanceLimit);
+        if (dist < bestDistance) {
+          bestDistance = dist;
+          bestIndex = count;
+          bestFunction = functionIndex;
+        }
+        count++;
+      }
+      // output the best match and remove it from worklist of functions
+      currentFunction++;
+      clusteredIndices[currentFunction] = bestFunction;
+      functionIndices.remove(bestIndex);
+    }
+
+    recomputeJsAndStatementRanges(clusteredIndices);
+  }
+
+  @Override
+  protected void endStatements(StringBuilder newJs, ArrayList<Integer>  
starts,
+      ArrayList<Integer> ends) {
+    // Then output everything else that is not a function.
+    for (int i = 0; i < statementRanges.numStatements(); i++) {
+      String code = getJsForRange(i);
+      if (!code.startsWith("function")) {
+        addStatement(code, newJs, starts, ends);
+      }
+    }
+    super.endStatements(newJs, starts, ends);
+  }
+
+  /**
+   * Compute the Levenshtein distance between two strings, up to a distance
+   * limit.
+   */
+  private int levdist(String str1, String str2, int limit) {
+    if (str1.length() > str2.length()) {
+      return levdist(str2, str1, limit);
+    }
+    if (str1.length() == 0) {
+      return str2.length();
+    }
+    if (Math.abs(str1.length() - str2.length()) >= limit) {
+      return limit;
+    }
+
+    int str1len = str1.length();
+    int str2len = str2.length();
+
+    int lastRow[] = new int[str2len + 1];
+    int nextRow[] = new int[str2len + 1];
+
+    for (int j = 0; j <= Math.min(str2len, limit + 1); j++) {
+      lastRow[j] = j;
+    }
+
+    for (int i = 1; i <= str1len; i++) {
+      nextRow[0] = i;
+
+      if (i >= limit) {
+        nextRow[i - limit] = limit;
+      }
+      if (i >= limit + 1) {
+        nextRow[i - limit - 1] = limit;
+      }
+      if (i + limit <= str2len) {
+        nextRow[i + limit] = limit;
+      }
+      if (i + limit + 1 <= str2len) {
+        nextRow[i + limit + 1] = limit;
+      }
+
+      char c1 = str1.charAt(i - 1);
+
+      int j = Math.max(1, (i - limit + 1));
+      int jmax = Math.min(str2len, (i + limit - 1));
+
+      while (j <= jmax) {
+        char c2 = str2.charAt(j - 1);
+        int costSwap = c1 == c2 ? 0 : 1;
+        nextRow[j] = Math.min(Math.min(lastRow[j] + 1, nextRow[j - 1] + 1),
+            lastRow[j - 1] + costSwap);
+        j = j + 1;
+      }
+      int tmpRow[] = nextRow;
+      nextRow = lastRow;
+      lastRow = tmpRow;
+    }
+    return lastRow[Math.min(str2len, limit)];
+  }
+
+  private int stmtSize(int index1) {
+    return statementRanges.end(index1) - statementRanges.start(index1);
+  }
+}
=======================================
--- /dev/null
+++  
/trunk/dev/core/src/com/google/gwt/dev/jjs/impl/JsIEBlockTextTransformer.java   
 
Thu Aug 13 12:21:36 2009
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2009 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may  
not
+ * use this file except in compliance with the License. You may obtain a  
copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,  
WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations  
under
+ * the License.
+ */
+package com.google.gwt.dev.jjs.impl;
+
+import com.google.gwt.core.ext.linker.StatementRanges;
+
+import java.util.ArrayList;
+
+/**
+ * Limits top-level blocks to MAX_BLOCK_SIZE statements.
+ */
+public class JsIEBlockTextTransformer extends JsAbstractTextTransformer {
+
+  // uncomment to test
+
+  //  private static final int MAX_BLOCK_SIZE = 10;
+  private static final int MAX_BLOCK_SIZE = 1 << 15 - 1;
+
+  private boolean doSplits;
+
+  private int currentStatementCount;
+
+  public JsIEBlockTextTransformer(String js, StatementRanges  
statementRanges) {
+    super(js, statementRanges);
+  }
+
+  public JsIEBlockTextTransformer(JsAbstractTextTransformer xformer) {
+    super(xformer);
+  }
+
+  /**
+   * Do not perform clustering, only fix up IE7 block issue.
+   */
+  public void exec() {
+    doSplits = statementRanges.numStatements() > MAX_BLOCK_SIZE;
+    if (doSplits) {
+      int statementIndices[] = new int[statementRanges.numStatements()];
+      for (int i = 0; i < statementRanges.numStatements(); i++) {
+        statementIndices[i] = i;
+      }
+      recomputeJsAndStatementRanges(statementIndices);
+    }
+  }
+
+  /**
+   * Record start of statement, and optionally inject new open block.
+   */
+  protected void beginStatement(StringBuilder newJs,
+      ArrayList<Integer> starts) {
+    if (doSplits && currentStatementCount == 0) {
+      super.beginStatement(newJs, starts);
+      newJs.append('{');
+    } else if (!doSplits) {
+      super.beginStatement(newJs, starts);
+    }
+  }
+
+  @Override
+  protected void beginStatements(StringBuilder newJs, ArrayList<Integer>  
starts,
+      ArrayList<Integer> ends) {
+    super.beginStatements(newJs, starts, ends);
+    currentStatementCount = 0;
+  }
+
+  /**
+   * Record end of statement, and optionally inject close block, if block  
is
+   * full.
+   */
+  protected void endStatement(StringBuilder newJs, ArrayList<Integer>  
ends) {
+    currentStatementCount++;
+    if (doSplits && currentStatementCount == MAX_BLOCK_SIZE) {
+      newJs.append('}');
+      super.endStatement(newJs, ends);
+      currentStatementCount = 0;
+    } else if (!doSplits) {
+      super.endStatement(newJs, ends);
+    }
+  }
+
+  /**
+   * Used to close a trailing block which never filled.
+   */
+  @Override
+  protected void endStatements(StringBuilder newJs, ArrayList<Integer>  
starts,
+      ArrayList<Integer> ends) {
+    optionallyCloseLastBlock(newJs, ends);
+    super.endStatements(newJs, starts, ends);
+  }
+
+  /**
+   * Close last block if it never filled.
+   */
+  private void optionallyCloseLastBlock(StringBuilder newJs,
+      ArrayList<Integer> ends) {
+    if (doSplits && currentStatementCount > 1
+        && currentStatementCount < MAX_BLOCK_SIZE) {
+      newJs.append("}");
+      ends.add(newJs.length());
+    }
+  }
+}
=======================================
---  
/trunk/dev/core/src/com/google/gwt/dev/jjs/JavaToJavaScriptCompiler.java        
 
Tue Aug 11 12:45:07 2009
+++  
/trunk/dev/core/src/com/google/gwt/dev/jjs/JavaToJavaScriptCompiler.java        
 
Thu Aug 13 12:21:36 2009
@@ -16,14 +16,15 @@
  package com.google.gwt.dev.jjs;

  import com.google.gwt.core.ext.PropertyOracle;
+import com.google.gwt.core.ext.SelectionProperty;
  import com.google.gwt.core.ext.TreeLogger;
  import com.google.gwt.core.ext.UnableToCompleteException;
  import com.google.gwt.core.ext.linker.ArtifactSet;
  import com.google.gwt.core.ext.linker.StatementRanges;
  import com.google.gwt.core.ext.linker.SymbolData;
  import com.google.gwt.core.ext.linker.impl.StandardCompilationAnalysis;
-import com.google.gwt.core.ext.linker.impl.StandardSymbolData;
  import  
com.google.gwt.core.ext.linker.impl.StandardCompilationAnalysis.SoycArtifact;
+import com.google.gwt.core.ext.linker.impl.StandardSymbolData;
  import com.google.gwt.core.ext.soyc.Range;
  import com.google.gwt.core.ext.soyc.impl.DependencyRecorder;
  import com.google.gwt.core.ext.soyc.impl.SizeMapRecorder;
@@ -56,6 +57,7 @@
  import com.google.gwt.dev.jjs.impl.CastNormalizer;
  import com.google.gwt.dev.jjs.impl.CatchBlockNormalizer;
  import com.google.gwt.dev.jjs.impl.CodeSplitter;
+import  
com.google.gwt.dev.jjs.impl.CodeSplitter.MultipleDependencyGraphRecorder;
  import com.google.gwt.dev.jjs.impl.DeadCodeElimination;
  import com.google.gwt.dev.jjs.impl.EqualityNormalizer;
  import com.google.gwt.dev.jjs.impl.Finalizer;
@@ -65,6 +67,8 @@
  import com.google.gwt.dev.jjs.impl.GenerateJavaScriptAST;
  import com.google.gwt.dev.jjs.impl.JavaScriptObjectNormalizer;
  import com.google.gwt.dev.jjs.impl.JavaToJavaScriptMap;
+import com.google.gwt.dev.jjs.impl.JsFunctionClusterer;
+import com.google.gwt.dev.jjs.impl.JsIEBlockTextTransformer;
  import com.google.gwt.dev.jjs.impl.JsoDevirtualizer;
  import com.google.gwt.dev.jjs.impl.LongCastNormalizer;
  import com.google.gwt.dev.jjs.impl.LongEmulationNormalizer;
@@ -80,7 +84,6 @@
  import com.google.gwt.dev.jjs.impl.SourceGenerationVisitor;
  import com.google.gwt.dev.jjs.impl.TypeMap;
  import com.google.gwt.dev.jjs.impl.TypeTightener;
-import  
com.google.gwt.dev.jjs.impl.CodeSplitter.MultipleDependencyGraphRecorder;
  import com.google.gwt.dev.js.EvalFunctionsAtTopScope;
  import com.google.gwt.dev.js.JsBreakUpLargeVarStatements;
  import com.google.gwt.dev.js.JsIEBlockSizeVisitor;
@@ -316,7 +319,23 @@

        // Work around an IE7 bug,
        // http://code.google.com/p/google-web-toolkit/issues/detail?id=1440
-      JsIEBlockSizeVisitor.exec(jsProgram);
+      // note, JsIEBlockTextTransformer now handles restructuring top level
+      // blocks, this class now handles non-top level blocks only.
+      SelectionProperty userAgentProperty = null;
+      for (PropertyOracle oracle : propertyOracles) {
+        userAgentProperty =  
oracle.getSelectionProperty(logger, "user.agent");
+        if (userAgentProperty != null) {
+          break;
+        }
+      }
+      // if user agent is known or ie6, split overly large blocks
+      boolean splitBlocks = userAgentProperty == null || (
+          userAgentProperty != null &&
+              "ie6".equals(userAgentProperty.getCurrentValue()));
+
+      if (splitBlocks) {
+        JsIEBlockSizeVisitor.exec(jsProgram);
+      }
        JsBreakUpLargeVarStatements.exec(jsProgram, propertyOracles);

        // (12) Generate the final output text.
@@ -327,7 +346,7 @@
        List<Map<Range, SourceInfo>> sourceInfoMaps = options.isSoycExtra()
            ? new ArrayList<Map<Range, SourceInfo>>() : null;
        generateJavaScriptCode(options, jsProgram, map, js, ranges,
-          sizeBreakdowns, sourceInfoMaps);
+          sizeBreakdowns, sourceInfoMaps, splitBlocks);

        PermutationResult toReturn = new PermutationResultImpl(js,
            makeSymbolMap(symbolTable), ranges, permutationId);
@@ -820,11 +839,12 @@
     *          JavaScript
     * @param sourceInfoMaps An array to hold the source info maps for that
     *          JavaScript
+   * @param splitBlocks true if current permutation is for IE6 or unknown
     */
    private static void generateJavaScriptCode(JJSOptions options,
        JsProgram jsProgram, JavaToJavaScriptMap jjsMap, String[] js,
        StatementRanges[] ranges, SizeBreakdown[] sizeBreakdowns,
-      List<Map<Range, SourceInfo>> sourceInfoMaps) {
+      List<Map<Range, SourceInfo>> sourceInfoMaps, boolean splitBlocks) {
      for (int i = 0; i < js.length; i++) {
        DefaultTextOutput out = new DefaultTextOutput(
            options.getOutput().shouldMinimize());
@@ -835,14 +855,31 @@
          v = new JsSourceGenerationVisitorWithSizeBreakdown(out, jjsMap);
        }
        v.accept(jsProgram.getFragmentBlock(i));
-      js[i] = out.toString();
+
+      /**
+       * Reorder function decls to improve compression ratios. Also  
restructures
+       * the top level blocks into sub-blocks if they exceed 32767  
statements.
+       */
+      JsFunctionClusterer clusterer = new  
JsFunctionClusterer(out.toString(),
+          v.getStatementRanges());
+      // only cluster for obfuscated mode
+      if (options.getOutput() == JsOutputOption.OBFUSCATED) {
+        clusterer.exec();
+      }
+      // rewrite top-level blocks to limit the number of statements
+      JsIEBlockTextTransformer ieXformer = new JsIEBlockTextTransformer(
+          clusterer);
+      if (splitBlocks) {
+        ieXformer.exec();
+      }
+      js[i] = ieXformer.getJs();
        if (sizeBreakdowns != null) {
          sizeBreakdowns[i] = v.getSizeBreakdown();
        }
        if (sourceInfoMaps != null) {
          sourceInfoMaps.add(((JsReportGenerationVisitor)  
v).getSourceInfoMap());
        }
-      ranges[i] = v.getStatementRanges();
+      ranges[i] = ieXformer.getStatementRanges();
      }
    }


--~--~---------~--~----~------------~-------~--~----~
http://groups.google.com/group/Google-Web-Toolkit-Contributors
-~----------~----~----~----~------~----~------~--~---

Reply via email to