Author: mrglavas
Date: Wed Jul  4 19:56:10 2012
New Revision: 1357381

URL: http://svn.apache.org/viewvc?rev=1357381&view=rev
Log:
Enhancements to the hashing algorithms used in internal data structures within 
Xerces to make them more resistant to collisions. To improve the distribution a 
new hash function is randomly selected each time the number of items in a 
bucket exceeds a threshold.

Added:
    
xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java  
 (with props)
Modified:
    xerces/java/trunk/src/org/apache/xerces/impl/dtd/DTDGrammar.java
    xerces/java/trunk/src/org/apache/xerces/util/SoftReferenceSymbolTable.java
    xerces/java/trunk/src/org/apache/xerces/util/SymbolHash.java
    xerces/java/trunk/src/org/apache/xerces/util/SymbolTable.java
    xerces/java/trunk/src/org/apache/xerces/util/XMLAttributesImpl.java

Modified: xerces/java/trunk/src/org/apache/xerces/impl/dtd/DTDGrammar.java
URL: 
http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/impl/dtd/DTDGrammar.java?rev=1357381&r1=1357380&r2=1357381&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/impl/dtd/DTDGrammar.java (original)
+++ xerces/java/trunk/src/org/apache/xerces/impl/dtd/DTDGrammar.java Wed Jul  4 
19:56:10 2012
@@ -19,6 +19,7 @@ package org.apache.xerces.impl.dtd;
 
 import java.util.ArrayList;
 import java.util.Hashtable;
+import java.util.Random;
 
 import org.apache.xerces.impl.dtd.models.CMAny;
 import org.apache.xerces.impl.dtd.models.CMBinOp;
@@ -2639,6 +2640,26 @@ public class DTDGrammar 
      * @author Andy Clark, IBM
      */
     protected static final class QNameHashtable {
+        
+        private static final class PrimeNumberSequenceGenerator {
+            
+            private static int [] PRIMES = {
+                3,   5,   7,  11,  13,  17,  19,  23,  29,  31,  37,  41,  43, 
 47,  53,  59, 
+               61,  67,  71,  73,  79,  83,  89,  97, 101, 103, 107, 109, 113, 
127, 131, 137, 
+              139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 
211, 223, 227, 
+              229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 
307, 311, 313, 
+              317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 
401, 409, 419, 
+              421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 
499, 503, 509, 
+              521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 
607, 613, 617, 
+              619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 
709, 719, 727};
+            
+            static void generateSequence(int[] arrayToFill) {
+                Random r = new Random();
+                for (int i = 0; i < arrayToFill.length; ++i) {
+                    arrayToFill[i] = PRIMES[r.nextInt(PRIMES.length)];
+                }  
+            }
+        }
     
         //
         // Constants
@@ -2651,11 +2672,29 @@ public class DTDGrammar 
         //       that we get a better distribution for hashing. -Ac
         /** Hashtable size (101). */
         private static final int HASHTABLE_SIZE = 101;
+        
+        /** Maximum hash collisions per bucket for a table with load factor == 
1. */
+        private static final int MAX_HASH_COLLISIONS = 40;
+        
+        private static final int MULTIPLIERS_SIZE = 1 << 5;
+        private static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1;
 
         //
         // Data
         //
         private Object[][] fHashTable = new Object[HASHTABLE_SIZE][];
+        
+        /** actual table size **/
+        private int fTableSize = HASHTABLE_SIZE;
+
+        /** The total number of entries in the hash table. */
+        private int fCount = 0;
+        
+        /**
+         * Array of randomly selected hash function multipliers or 
<code>null</code>
+         * if the default String.hashCode() function should be used.
+         */
+        private int[] fHashMultipliers;
 
         //
         // Public methods
@@ -2663,7 +2702,7 @@ public class DTDGrammar 
         /** Associates the given value with the specified key tuple. */
         public void put(String key, int value) {
 
-            int hash = (key.hashCode() & 0x7FFFFFFF) % HASHTABLE_SIZE;
+            int hash = (hash(key) & 0x7FFFFFFF) % fTableSize;
             Object[] bucket = fHashTable[hash];
 
             if (bucket == null) {
@@ -2672,6 +2711,11 @@ public class DTDGrammar 
                 bucket[1] = key;
                 bucket[2] = new int[]{value};
                 fHashTable[hash] = bucket;
+                if (++fCount > fTableSize) {
+                    // Rehash the table if the number of entries
+                    // would exceed the number of buckets.
+                    rehash();
+                }
             } else {
                 int count = ((int[])bucket[0])[0];
                 int offset = 1 + 2*count;
@@ -2692,10 +2736,20 @@ public class DTDGrammar 
                     }
                     j += 2;
                 }
-                if (! found) {
+                if (!found) {
                     bucket[offset++] = key;
                     bucket[offset]= new int[]{value};
                     ((int[])bucket[0])[0] = ++count;
+                    if (++fCount > fTableSize) {
+                        // Rehash the table if the number of entries
+                        // would exceed the number of buckets.
+                        rehash();
+                    }
+                    else if (count > MAX_HASH_COLLISIONS) {
+                        // Select a new hash function and rehash the table if
+                        // MAX_HASH_COLLISIONS is exceeded.
+                        rebalance();
+                    }
                 }
 
             }
@@ -2706,7 +2760,7 @@ public class DTDGrammar 
 
         /** Returns the value associated with the specified key tuple. */
         public int get(String key) {
-            int hash = (key.hashCode() & 0x7FFFFFFF) % HASHTABLE_SIZE;
+            int hash = (hash(key) & 0x7FFFFFFF) % fTableSize;
             Object[] bucket = fHashTable[hash];
 
             if (bucket == null) {
@@ -2724,6 +2778,93 @@ public class DTDGrammar 
             return -1;
 
         } // get(int,String,String)
+        
+        public int hash(String symbol) {
+            if (fHashMultipliers == null) {
+                return symbol.hashCode();
+            }
+            return hash0(symbol);
+        } // hash(String):int
+        
+        private int hash0(String symbol) {
+            int code = 0;
+            final int length = symbol.length();
+            final int[] multipliers = fHashMultipliers;
+            for (int i = 0; i < length; ++i) {
+                code = code * multipliers[i & MULTIPLIERS_MASK] + 
symbol.charAt(i);
+            }
+            return code;
+        } // hash0(String):int
+        
+        private void rehash() {
+            rehashCommon(fHashTable.length * 2 + 1);
+        } // rehash()
+        
+        private void rebalance() {
+            if (fHashMultipliers == null) {
+                fHashMultipliers = new int[MULTIPLIERS_SIZE];
+            }
+            PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+            rehashCommon(fHashTable.length);
+        } // rebalance()
+        
+        private void rehashCommon(final int newCapacity) {
+            
+            final int oldCapacity = fHashTable.length;
+            final Object[][] oldTable = fHashTable;
+
+            final Object[][] newTable = new Object[newCapacity][];
+            
+            fHashTable = newTable;
+            fTableSize = fHashTable.length;
+            
+            for (int i = 0; i < oldCapacity; ++i) {
+                final Object[] oldBucket = oldTable[i];
+                if (oldBucket != null) {
+                    final int oldCount = ((int[]) oldBucket[0])[0];
+                    boolean oldBucketReused = false;
+                    int k = 1;
+                    for (int j = 0; j < oldCount; ++j) {
+                        final String key = (String) oldBucket[k];
+                        final Object value = oldBucket[k+1];
+                        
+                        final int hash = (hash(key) & 0x7FFFFFFF) % fTableSize;
+                        Object[] bucket = fHashTable[hash];
+                        
+                        if (bucket == null) {
+                            if (oldBucketReused) {
+                                bucket = new Object[1 + 2*INITIAL_BUCKET_SIZE];
+                                bucket[0] = new int[]{1};
+                            }
+                            else {
+                                bucket = oldBucket;
+                                ((int[])bucket[0])[0] = 1;
+                                oldBucketReused = true;
+                            }
+                            bucket[1] = key;
+                            bucket[2] = value;
+                            fHashTable[hash] = bucket;
+                        }
+                        else {
+                            int count = ((int[])bucket[0])[0];
+                            int offset = 1 + 2*count;
+                            if (offset == bucket.length) {
+                                int newSize = count + INITIAL_BUCKET_SIZE;
+                                Object[] newBucket = new Object[1 + 2*newSize];
+                                System.arraycopy(bucket, 0, newBucket, 0, 
offset);
+                                bucket = newBucket;
+                                fHashTable[hash] = bucket;
+                            }
+                            bucket[offset++] = key;
+                            bucket[offset]= value;
+                            ((int[])bucket[0])[0] = ++count;
+                        }
+                        k += 2;
+                    }
+                } 
+            }
+            
+        } // rehashCommon(int)
 
     }  // class QNameHashtable
 

Added: 
xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java
URL: 
http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java?rev=1357381&view=auto
==============================================================================
--- 
xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java 
(added)
+++ 
xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java 
Wed Jul  4 19:56:10 2012
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.xerces.util;
+
+import java.util.Random;
+
+/**
+ * @version $Id$
+ */
+final class PrimeNumberSequenceGenerator {
+    
+    private static int [] PRIMES = {
+        3,   5,   7,  11,  13,  17,  19,  23,  29,  31,  37,  41,  43,  47,  
53,  59, 
+       61,  67,  71,  73,  79,  83,  89,  97, 101, 103, 107, 109, 113, 127, 
131, 137, 
+      139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 
223, 227, 
+      229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 
311, 313, 
+      317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 
409, 419, 
+      421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 
503, 509, 
+      521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 
613, 617, 
+      619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 
719, 727};
+    
+    static void generateSequence(int[] arrayToFill) {
+        Random r = new Random();
+        for (int i = 0; i < arrayToFill.length; ++i) {
+            arrayToFill[i] = PRIMES[r.nextInt(PRIMES.length)];
+        }  
+    }
+}

Propchange: 
xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: 
xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Modified: 
xerces/java/trunk/src/org/apache/xerces/util/SoftReferenceSymbolTable.java
URL: 
http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/SoftReferenceSymbolTable.java?rev=1357381&r1=1357380&r2=1357381&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/util/SoftReferenceSymbolTable.java 
(original)
+++ xerces/java/trunk/src/org/apache/xerces/util/SoftReferenceSymbolTable.java 
Wed Jul  4 19:56:10 2012
@@ -127,6 +127,7 @@ public class SoftReferenceSymbolTable ex
     public String addSymbol(String symbol) {
         clean();
         // search for identical symbol
+        int collisionCount = 0;
         int bucket = hash(symbol) % fTableSize;
         for (SREntry entry = fBuckets[bucket]; entry != null; entry = 
entry.next) {
             SREntryData data = (SREntryData)entry.get();
@@ -136,13 +137,20 @@ public class SoftReferenceSymbolTable ex
             if (data.symbol.equals(symbol)) {
                 return data.symbol;
             }
+            ++collisionCount;
         }
         
         if (fCount >= fThreshold) {
             // Rehash the table if the threshold is exceeded
             rehash();
             bucket = hash(symbol) % fTableSize;
-        } 
+        }
+        else if (collisionCount >= fCollisionThreshold) {
+            // Select a new hash function and rehash the table if
+            // the collision threshold is exceeded.
+            rebalance();
+            bucket = hash(symbol) % fTableSize;
+        }
         
         // add new entry
         symbol = symbol.intern();
@@ -165,6 +173,7 @@ public class SoftReferenceSymbolTable ex
     public String addSymbol(char[] buffer, int offset, int length) {
         clean();
         // search for identical symbol
+        int collisionCount = 0;
         int bucket = hash(buffer, offset, length) % fTableSize;
         OUTER: for (SREntry entry = fBuckets[bucket]; entry != null; entry = 
entry.next) {
             SREntryData data = (SREntryData)entry.get();
@@ -174,18 +183,26 @@ public class SoftReferenceSymbolTable ex
             if (length == data.characters.length) {
                 for (int i = 0; i < length; i++) {
                     if (buffer[offset + i] != data.characters[i]) {
+                        ++collisionCount;
                         continue OUTER;
                     }
                 }
                 return data.symbol;
             }
+            ++collisionCount;
         }
         
         if (fCount >= fThreshold) {
             // Rehash the table if the threshold is exceeded
             rehash();
             bucket = hash(buffer, offset, length) % fTableSize;
-        } 
+        }
+        else if (collisionCount >= fCollisionThreshold) {
+            // Select a new hash function and rehash the table if
+            // the collision threshold is exceeded.
+            rebalance();
+            bucket = hash(buffer, offset, length) % fTableSize;
+        }
         
         // add new entry
         String symbol = new String(buffer, offset, length).intern();
@@ -218,6 +235,20 @@ public class SoftReferenceSymbolTable ex
         rehashCommon(((int) (fCount / fLoadFactor)) * 2 + 1);
     }
     
+    /**
+     * Randomly selects a new hash function and reorganizes this SymbolTable
+     * in order to more evenly distribute its entries across the table. This 
+     * method is called automatically when the number keys in one of the 
+     * SymbolTable's buckets exceeds the given collision threshold.
+     */
+    protected void rebalance() {
+        if (fHashMultipliers == null) {
+            fHashMultipliers = new int[MULTIPLIERS_SIZE];
+        }
+        PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+        rehashCommon(fBuckets.length);
+    }
+    
     private void rehashCommon(final int newCapacity) {
         
         final int oldCapacity = fBuckets.length;
@@ -235,7 +266,7 @@ public class SoftReferenceSymbolTable ex
 
                 SREntryData data = (SREntryData)e.get();
                 if (data != null) {
-                    int index = hash(data.characters, 0, 
data.characters.length) % newCapacity;
+                    int index = hash(data.symbol) % newCapacity;
                     if (newTable[index] != null) {
                         newTable[index].prev = e;
                     }

Modified: xerces/java/trunk/src/org/apache/xerces/util/SymbolHash.java
URL: 
http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/SymbolHash.java?rev=1357381&r1=1357380&r2=1357381&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/util/SymbolHash.java (original)
+++ xerces/java/trunk/src/org/apache/xerces/util/SymbolHash.java Wed Jul  4 
19:56:10 2012
@@ -31,19 +31,34 @@ public class SymbolHash {
     //
     // Constants
     //
-
+    
     /** Default table size. */
-    protected int fTableSize = 101;
+    protected static final int TABLE_SIZE = 101;
+    
+    /** Maximum hash collisions per bucket. */
+    protected static final int MAX_HASH_COLLISIONS = 40;
+    
+    protected static final int MULTIPLIERS_SIZE = 1 << 5;
+    protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1;
 
     //
     // Data
     //
+    
+    /** Actual table size **/
+    protected int fTableSize;
 
     /** Buckets. */
     protected Entry[] fBuckets; 
 
     /** Number of elements. */
     protected int fNum = 0;
+    
+    /**
+     * Array of randomly selected hash function multipliers or 
<code>null</code>
+     * if the default String.hashCode() function should be used.
+     */
+    protected int[] fHashMultipliers;
 
     //
     // Constructors
@@ -51,7 +66,7 @@ public class SymbolHash {
 
     /** Constructs a key table with the default size. */
     public SymbolHash() {
-        fBuckets = new Entry[fTableSize];
+        this(TABLE_SIZE);
     }
 
     /**
@@ -77,26 +92,37 @@ public class SymbolHash {
      * @param value 
      */
     public void put(Object key, Object value) {
+        
+        // search for identical key
+        int collisionCount = 0;
         final int hash = hash(key);
         int bucket = hash % fTableSize;
-        Entry entry = search(key, bucket);
-
-        // replace old value
-        if (entry != null) {
-            entry.value = value;
-        }
-        // create new entry
-        else {
-            if (fNum >= fTableSize) {
-                // Rehash the table if the number of entries
-                // would exceed the number of buckets.
-                rehash();
-                bucket = hash % fTableSize;
+        for (Entry entry = fBuckets[bucket]; entry != null; entry = 
entry.next) {
+            if (key.equals(entry.key)) {
+                // replace old value
+                entry.value = value;
+                return;
             }
-            entry = new Entry(key, value, fBuckets[bucket]);
-            fBuckets[bucket] = entry;
-            fNum++;
+            ++collisionCount;
+        }
+        
+        if (fNum >= fTableSize) {
+            // Rehash the table if the number of entries
+            // would exceed the number of buckets.
+            rehash();
+            bucket = hash % fTableSize;
+        }
+        else if (collisionCount >= MAX_HASH_COLLISIONS && key instanceof 
String) {
+            // Select a new hash function and rehash the table if
+            // MAX_HASH_COLLISIONS is exceeded.
+            rebalance();
+            bucket = hash(key) % fTableSize;
         }
+        
+        // create new entry
+        Entry entry = new Entry(key, value, fBuckets[bucket]);
+        fBuckets[bucket] = entry;
+        ++fNum;
     }
 
     /**
@@ -161,6 +187,7 @@ public class SymbolHash {
     public SymbolHash makeClone() {
         SymbolHash newTable = new SymbolHash(fTableSize);
         newTable.fNum = fNum;
+        newTable.fHashMultipliers = fHashMultipliers != null ? (int[]) 
fHashMultipliers.clone() : null;
         for (int i = 0; i < fTableSize; i++) {
             if (fBuckets[i] != null) {
                 newTable.fBuckets[i] = fBuckets[i].makeClone();
@@ -178,6 +205,7 @@ public class SymbolHash {
             fBuckets[i] = null;
         }
         fNum = 0;
+        fHashMultipliers = null;
     } // clear():  void
 
     protected Entry search(Object key, int bucket) {
@@ -195,8 +223,21 @@ public class SymbolHash {
      * @param key The key to hash.
      */
     protected int hash(Object key) {
-        return key.hashCode() & 0x7FFFFFFF;
-    }
+        if (fHashMultipliers == null || !(key instanceof String)) {
+            return key.hashCode() & 0x7FFFFFFF;
+        }
+        return hash0((String) key);
+    } // hash(Object):int
+    
+    private int hash0(String symbol) {
+        int code = 0;
+        final int length = symbol.length();
+        final int[] multipliers = fHashMultipliers;
+        for (int i = 0; i < length; ++i) {
+            code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i);
+        }
+        return code & 0x7FFFFFFF;
+    } // hash0(String):int
     
     /**
      * Increases the capacity of and internally reorganizes this 
@@ -205,11 +246,28 @@ public class SymbolHash {
      * number of keys in the SymbolHash exceeds its number of buckets.
      */
     protected void rehash() {
-
+        rehashCommon((fBuckets.length << 1) + 1);
+    }
+    
+    /**
+     * Randomly selects a new hash function and reorganizes this SymbolHash
+     * in order to more evenly distribute its entries across the table. This 
+     * method is called automatically when the number keys in one of the 
+     * SymbolHash's buckets exceeds MAX_HASH_COLLISIONS.
+     */
+    protected void rebalance() {
+        if (fHashMultipliers == null) {
+            fHashMultipliers = new int[MULTIPLIERS_SIZE];
+        }
+        PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+        rehashCommon(fBuckets.length);
+    }
+    
+    private void rehashCommon(final int newCapacity) {
+        
         final int oldCapacity = fBuckets.length;
         final Entry[] oldTable = fBuckets;
 
-        final int newCapacity = (oldCapacity << 1) + 1;
         final Entry[] newTable = new Entry[newCapacity];
 
         fBuckets = newTable;

Modified: xerces/java/trunk/src/org/apache/xerces/util/SymbolTable.java
URL: 
http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/SymbolTable.java?rev=1357381&r1=1357380&r2=1357381&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/util/SymbolTable.java (original)
+++ xerces/java/trunk/src/org/apache/xerces/util/SymbolTable.java Wed Jul  4 
19:56:10 2012
@@ -84,6 +84,12 @@ public class SymbolTable {
 
     /** Default table size. */
     protected static final int TABLE_SIZE = 101;
+    
+    /** Maximum hash collisions per bucket for a table with load factor == 1. 
*/
+    protected static final int MAX_HASH_COLLISIONS = 40;
+    
+    protected static final int MULTIPLIERS_SIZE = 1 << 5;
+    protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1;
 
     //
     // Data
@@ -104,6 +110,18 @@ public class SymbolTable {
                                                         
     /** The load factor for the SymbolTable. */
     protected float fLoadFactor;
+    
+    /**
+     * A new hash function is selected and the table is rehashed when
+     * the number of keys in the bucket exceeds this threshold.
+     */
+    protected final int fCollisionThreshold;
+    
+    /**
+     * Array of randomly selected hash function multipliers or 
<code>null</code>
+     * if the default String.hashCode() function should be used.
+     */
+    protected int[] fHashMultipliers;
 
     //
     // Constructors
@@ -136,6 +154,7 @@ public class SymbolTable {
         fTableSize = initialCapacity;
         fBuckets = new Entry[fTableSize];
         fThreshold = (int)(fTableSize * loadFactor);
+        fCollisionThreshold = (int)(MAX_HASH_COLLISIONS * loadFactor);
         fCount = 0;
     }
 
@@ -174,18 +193,26 @@ public class SymbolTable {
     public String addSymbol(String symbol) {
         
         // search for identical symbol
+        int collisionCount = 0;
         int bucket = hash(symbol) % fTableSize;
         for (Entry entry = fBuckets[bucket]; entry != null; entry = 
entry.next) {
             if (entry.symbol.equals(symbol)) {
                 return entry.symbol;
             }
+            ++collisionCount;
         }
         
         if (fCount >= fThreshold) {
             // Rehash the table if the threshold is exceeded
             rehash();
             bucket = hash(symbol) % fTableSize;
-        } 
+        }
+        else if (collisionCount >= fCollisionThreshold) {
+            // Select a new hash function and rehash the table if
+            // the collision threshold is exceeded.
+            rebalance();
+            bucket = hash(symbol) % fTableSize;
+        }
         
         // create new entry
         Entry entry = new Entry(symbol, fBuckets[bucket]);
@@ -208,23 +235,32 @@ public class SymbolTable {
     public String addSymbol(char[] buffer, int offset, int length) {
         
         // search for identical symbol
+        int collisionCount = 0;
         int bucket = hash(buffer, offset, length) % fTableSize;
         OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = 
entry.next) {
             if (length == entry.characters.length) {
                 for (int i = 0; i < length; i++) {
                     if (buffer[offset + i] != entry.characters[i]) {
+                        ++collisionCount;
                         continue OUTER;
                     }
                 }
                 return entry.symbol;
             }
+            ++collisionCount;
         }
         
         if (fCount >= fThreshold) {
             // Rehash the table if the threshold is exceeded
             rehash();
             bucket = hash(buffer, offset, length) % fTableSize;
-        } 
+        }
+        else if (collisionCount >= fCollisionThreshold) {
+            // Select a new hash function and rehash the table if
+            // the collision threshold is exceeded.
+            rebalance();
+            bucket = hash(buffer, offset, length) % fTableSize;
+        }
         
         // add new entry
         Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]);
@@ -243,8 +279,21 @@ public class SymbolTable {
      * @param symbol The symbol to hash.
      */
     public int hash(String symbol) {
-        return symbol.hashCode() & 0x7FFFFFFF;
+        if (fHashMultipliers == null) {
+            return symbol.hashCode() & 0x7FFFFFFF;
+        }
+        return hash0(symbol);
     } // hash(String):int
+    
+    private int hash0(String symbol) {
+        int code = 0;
+        final int length = symbol.length();
+        final int[] multipliers = fHashMultipliers;
+        for (int i = 0; i < length; ++i) {
+            code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i);
+        }
+        return code & 0x7FFFFFFF;
+    } // hash0(String):int
 
     /**
      * Returns a hashcode value for the specified symbol information.
@@ -258,14 +307,25 @@ public class SymbolTable {
      * @param length The length of the symbol.
      */
     public int hash(char[] buffer, int offset, int length) {
+        if (fHashMultipliers == null) {
+            int code = 0;
+            for (int i = 0; i < length; ++i) {
+                code = code * 31 + buffer[offset + i];
+            }
+            return code & 0x7FFFFFFF;
+        }
+        return hash0(buffer, offset, length);
 
+    } // hash(char[],int,int):int
+    
+    private int hash0(char[] buffer, int offset, int length) {
         int code = 0;
+        final int[] multipliers = fHashMultipliers;
         for (int i = 0; i < length; ++i) {
-            code = code * 31 + buffer[offset + i];
+            code = code * multipliers[i & MULTIPLIERS_MASK] + buffer[offset + 
i];
         }
         return code & 0x7FFFFFFF;
-
-    } // hash(char[],int,int):int
+    } // hash0(char[],int,int):int
 
     /**
      * Increases the capacity of and internally reorganizes this 
@@ -275,11 +335,28 @@ public class SymbolTable {
      * and load factor. 
      */
     protected void rehash() {
-
+        rehashCommon(fBuckets.length * 2 + 1);
+    }
+    
+    /**
+     * Randomly selects a new hash function and reorganizes this SymbolTable
+     * in order to more evenly distribute its entries across the table. This 
+     * method is called automatically when the number keys in one of the 
+     * SymbolTable's buckets exceeds the given collision threshold.
+     */
+    protected void rebalance() {
+        if (fHashMultipliers == null) {
+            fHashMultipliers = new int[MULTIPLIERS_SIZE];
+        }
+        PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+        rehashCommon(fBuckets.length);
+    }
+    
+    private void rehashCommon(final int newCapacity) {
+        
         int oldCapacity = fBuckets.length;
         Entry[] oldTable = fBuckets;
 
-        int newCapacity = oldCapacity * 2 + 1;
         Entry[] newTable = new Entry[newCapacity];
 
         fThreshold = (int)(newCapacity * fLoadFactor);
@@ -291,7 +368,7 @@ public class SymbolTable {
                 Entry e = old;
                 old = old.next;
 
-                int index = hash(e.characters, 0, e.characters.length) % 
newCapacity;
+                int index = hash(e.symbol) % newCapacity;
                 e.next = newTable[index];
                 newTable[index] = e;
             }

Modified: xerces/java/trunk/src/org/apache/xerces/util/XMLAttributesImpl.java
URL: 
http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/XMLAttributesImpl.java?rev=1357381&r1=1357380&r2=1357381&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/util/XMLAttributesImpl.java 
(original)
+++ xerces/java/trunk/src/org/apache/xerces/util/XMLAttributesImpl.java Wed Jul 
 4 19:56:10 2012
@@ -50,6 +50,12 @@ public class XMLAttributesImpl
     /** Default table size. */
     protected static final int TABLE_SIZE = 101;
     
+    /** Maximum hash collisions per bucket. */
+    protected static final int MAX_HASH_COLLISIONS = 40;
+    
+    protected static final int MULTIPLIERS_SIZE = 1 << 5;
+    protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1;
+    
     /** 
      * Threshold at which an instance is treated
      * as a large attribute list.
@@ -103,6 +109,12 @@ public class XMLAttributesImpl
      * Indicates whether the table view contains consistent data.
      */
     protected boolean fIsTableViewConsistent;
+    
+    /**
+     * Array of randomly selected hash function multipliers or 
<code>null</code>
+     * if the default String.hashCode() function should be used.
+     */
+    protected int[] fHashMultipliers;
 
     //
     // Constructors
@@ -203,7 +215,8 @@ public class XMLAttributesImpl
              * the user of this class adds attributes, removes them, and
              * then adds more.
              */
-            if (!fIsTableViewConsistent || fLength == SIZE_LIMIT) {
+            if (!fIsTableViewConsistent || fLength == SIZE_LIMIT || 
+                (fLength > SIZE_LIMIT && fLength > fTableViewBuckets)) {
                 prepareAndPopulateTableView();
                 fIsTableViewConsistent = true;
             }
@@ -232,12 +245,14 @@ public class XMLAttributesImpl
             // We need to check if any of the attributes has the same rawname.
             else {
                 // Search the table.
+                int collisionCount = 0;
                 Attribute found = fAttributeTableView[bucket];
                 while (found != null) {
                     if (found.name.rawname == name.rawname) {
                         break;
                     }
                     found = found.next;
+                    ++collisionCount;
                 }
                 // This attribute is unique.
                 if (found == null) {
@@ -250,10 +265,19 @@ public class XMLAttributesImpl
                         }
                         fAttributes = attributes;
                     }
-                
-                    // Update table view
-                    fAttributes[index].next = fAttributeTableView[bucket];
-                    fAttributeTableView[bucket] = fAttributes[index];
+                    // Select a new hash function and rehash the table view
+                    // if the collision threshold is exceeded.
+                    if (collisionCount >= MAX_HASH_COLLISIONS) {
+                        // The current attribute will be processed in the 
rehash.
+                        // Need to set its name first.
+                        fAttributes[index].name.setValues(name);
+                        rebalanceTableView(fLength);
+                    }
+                    else {
+                        // Update table view
+                        fAttributes[index].next = fAttributeTableView[bucket];
+                        fAttributeTableView[bucket] = fAttributes[index];
+                    }
                 }
                 // Duplicate. We still need to find the index.
                 else {
@@ -829,60 +853,84 @@ public class XMLAttributesImpl
      */
     public QName checkDuplicatesNS() {
         // If the list is small check for duplicates using pairwise comparison.
-        if (fLength <= SIZE_LIMIT) {
-            for (int i = 0; i < fLength - 1; ++i) {
-               Attribute att1 = fAttributes[i];
-                for (int j = i + 1; j < fLength; ++j) {
-                    Attribute att2 = fAttributes[j];
+        final int length = fLength;
+        if (length <= SIZE_LIMIT) {
+            final Attribute[] attributes = fAttributes;
+            for (int i = 0; i < length - 1; ++i) {
+               Attribute att1 = attributes[i];
+                for (int j = i + 1; j < length; ++j) {
+                    Attribute att2 = attributes[j];
                     if (att1.name.localpart == att2.name.localpart &&
                         att1.name.uri == att2.name.uri) {
-                        return att2.name;      
+                        return att2.name;
                     }
                 }
             }
+            return null;
        }
        // If the list is large check duplicates using a hash table.
        else {
-            // We don't want this table view to be read if someone calls 
-            // addAttribute so we invalidate it up front.
-            fIsTableViewConsistent = false;
-
-            prepareTableView();
-
-            Attribute attr;
-            int bucket;
-
-            for (int i = fLength - 1; i >= 0; --i) {
-                attr = fAttributes[i];
-                bucket = getTableViewBucket(attr.name.localpart, 
attr.name.uri);
+           return checkManyDuplicatesNS();
+       }
+    }
+    
+    private QName checkManyDuplicatesNS() {
+        // We don't want this table view to be read if someone calls 
+        // addAttribute so we invalidate it up front.
+        fIsTableViewConsistent = false;
+
+        prepareTableView();
+
+        Attribute attr;
+        int bucket;
+        
+        final int length = fLength;
+        final Attribute[] attributes = fAttributes;
+        final Attribute[] attributeTableView = fAttributeTableView;
+        final int[] attributeTableViewChainState = 
fAttributeTableViewChainState;
+        int largeCount = fLargeCount;
+
+        for (int i = 0; i < length; ++i) {
+            attr = attributes[i];
+            bucket = getTableViewBucket(attr.name.localpart, attr.name.uri);
+            
+            // The chain is stale. 
+            // This must be a unique attribute.
+            if (attributeTableViewChainState[bucket] != largeCount) {
+                attributeTableViewChainState[bucket] = largeCount;
+                attr.next = null;
+                attributeTableView[bucket] = attr;
+            } 
+            // This chain is active. 
+            // We need to check if any of the attributes has the same name.
+            else {
+                // Search the table.
+                int collisionCount = 0;
+                Attribute found = attributeTableView[bucket];
+                while (found != null) {
+                    if (found.name.localpart == attr.name.localpart &&
+                        found.name.uri == attr.name.uri) {
+                        return attr.name;
+                    }
+                    found = found.next;
+                    ++collisionCount;
+                }
                 
-                // The chain is stale. 
-                // This must be a unique attribute.
-                if (fAttributeTableViewChainState[bucket] != fLargeCount) {
-                    fAttributeTableViewChainState[bucket] = fLargeCount;
-                    attr.next = null;
-                    fAttributeTableView[bucket] = attr;
-                } 
-                // This chain is active. 
-                // We need to check if any of the attributes has the same name.
+                // Select a new hash function and rehash the table view
+                // if the collision threshold is exceeded.
+                if (collisionCount >= MAX_HASH_COLLISIONS) {
+                    // The current attribute will be processed in the rehash.
+                    rebalanceTableViewNS(i+1);
+                    largeCount = fLargeCount;
+                }
                 else {
-                    // Search the table.
-                    Attribute found = fAttributeTableView[bucket];
-                    while (found != null) {
-                        if (found.name.localpart == attr.name.localpart &&
-                            found.name.uri == attr.name.uri) {
-                            return attr.name;
-                        }
-                        found = found.next;
-                    }
-                    
                     // Update table view
-                    attr.next = fAttributeTableView[bucket];
-                    fAttributeTableView[bucket] = attr;
+                    attr.next = attributeTableView[bucket];
+                    attributeTableView[bucket] = attr;
                 }
             }
-       }
-       return null;
+        }
+        return null;
     }
     
     /**
@@ -933,7 +981,7 @@ public class XMLAttributesImpl
      * would be hashed
      */
     protected int getTableViewBucket(String qname) {
-        return (qname.hashCode() & 0x7FFFFFFF) % fTableViewBuckets;
+        return (hash(qname) & 0x7FFFFFFF) % fTableViewBuckets;
     }
     
     /**
@@ -947,13 +995,36 @@ public class XMLAttributesImpl
      */
     protected int getTableViewBucket(String localpart, String uri) {
         if (uri == null) {
-            return (localpart.hashCode() & 0x7FFFFFFF) % fTableViewBuckets;
+            return (hash(localpart) & 0x7FFFFFFF) % fTableViewBuckets;
         }
         else {
-            return ((localpart.hashCode() + uri.hashCode()) 
-               & 0x7FFFFFFF) % fTableViewBuckets;
+            return (hash(localpart, uri) & 0x7FFFFFFF) % fTableViewBuckets;
         }
     }
+    
+    private int hash(String localpart) {
+        if (fHashMultipliers == null) {
+            return localpart.hashCode();
+        }
+        return hash0(localpart);
+    } // hash(String):int
+    
+    private int hash(String localpart, String uri) {
+        if (fHashMultipliers == null) {
+            return localpart.hashCode() + uri.hashCode() * 31;
+        }
+        return hash0(localpart) + hash0(uri) * 
fHashMultipliers[MULTIPLIERS_SIZE];
+    } // hash(String,String):int
+    
+    private int hash0(String symbol) {
+        int code = 0;
+        final int length = symbol.length();
+        final int[] multipliers = fHashMultipliers;
+        for (int i = 0; i < length; ++i) {
+            code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i);
+        }
+        return code;
+    } // hash0(String):int
        
     /**
      * Purges all elements from the table view.
@@ -971,9 +1042,31 @@ public class XMLAttributesImpl
     }
     
     /**
+     * Increases the capacity of the table view.
+     */
+    private void growTableView() {
+        final int length = fLength;
+        int tableViewBuckets = fTableViewBuckets;
+        do {
+            tableViewBuckets = (tableViewBuckets << 1) + 1;
+            if (tableViewBuckets < 0) {
+                tableViewBuckets = Integer.MAX_VALUE;
+                break;
+            } 
+        }
+        while (length > tableViewBuckets);
+        fTableViewBuckets = tableViewBuckets;
+        fAttributeTableView = null;
+        fLargeCount = 1;
+    }
+    
+    /**
      * Prepares the table view of the attributes list for use.
      */
     protected void prepareTableView() {
+        if (fLength > fTableViewBuckets) {
+            growTableView();
+        }
         if (fAttributeTableView == null) {
             fAttributeTableView = new Attribute[fTableViewBuckets];
             fAttributeTableViewChainState = new int[fTableViewBuckets];
@@ -989,11 +1082,15 @@ public class XMLAttributesImpl
      * previously read.
      */
     protected void prepareAndPopulateTableView() {
+        prepareAndPopulateTableView(fLength);
+    }
+    
+    private void prepareAndPopulateTableView(final int count) {
         prepareTableView();
-        // Need to populate the hash table with the attributes we've scanned 
so far.
+        // Need to populate the hash table with the attributes we've processed 
so far.
         Attribute attr;
         int bucket;
-        for (int i = 0; i < fLength; ++i) {
+        for (int i = 0; i < count; ++i) {
             attr = fAttributes[i];
             bucket = getTableViewBucket(attr.name.rawname);
             if (fAttributeTableViewChainState[bucket] != fLargeCount) {
@@ -1008,6 +1105,55 @@ public class XMLAttributesImpl
             }
         }
     }
+    
+    private void prepareAndPopulateTableViewNS(final int count) {
+        prepareTableView();
+        // Need to populate the hash table with the attributes we've processed 
so far.
+        Attribute attr;
+        int bucket;
+        for (int i = 0; i < count; ++i) {
+            attr = fAttributes[i];
+            bucket = getTableViewBucket(attr.name.localpart, attr.name.uri);
+            if (fAttributeTableViewChainState[bucket] != fLargeCount) {
+                fAttributeTableViewChainState[bucket] = fLargeCount;
+                attr.next = null;
+                fAttributeTableView[bucket] = attr;
+            } 
+            else {
+                // Update table view
+                attr.next = fAttributeTableView[bucket];
+                fAttributeTableView[bucket] = attr;
+            }
+        }
+    }
+    
+    /**
+     * Randomly selects a new hash function and reorganizes the table view
+     * in order to more evenly distribute its entries. This method is called
+     * automatically when the number of attributes in one bucket exceeds
+     * MAX_HASH_COLLISIONS.
+     */
+    private void rebalanceTableView(final int count) {
+        if (fHashMultipliers == null) {
+            fHashMultipliers = new int[MULTIPLIERS_SIZE + 1];
+        }
+        PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+        prepareAndPopulateTableView(count);
+    }
+    
+    /**
+     * Randomly selects a new hash function and reorganizes the table view
+     * in order to more evenly distribute its entries. This method is called
+     * automatically when the number of attributes in one bucket exceeds
+     * MAX_HASH_COLLISIONS.
+     */
+    private void rebalanceTableViewNS(final int count) {
+        if (fHashMultipliers == null) {
+            fHashMultipliers = new int[MULTIPLIERS_SIZE + 1];
+        }
+        PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+        prepareAndPopulateTableViewNS(count);
+    }
 
     //
     // Classes



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@xerces.apache.org
For additional commands, e-mail: commits-h...@xerces.apache.org

Reply via email to